Esempio n. 1
0
def main():
    """Invoke when run directly as a program."""
    args = parse_arguments()

    demux_data = _tsv_to_dict(args.annotation_file)
    barcodes = list(demux_data.keys())

    adapters_temp = list(set([demux_data[barcode]["3' adapter"] for barcode in barcodes]))
    if len(adapters_temp) != 1:
        print('{"proc.error":"Only one adapter type is allowed per demultiplexing job."}')
        exit(1)

    adapter = ''.join([adpr.split('_')[0] for adpr in adapters_temp[0].split(',')])

    process = Popen(['iCount', 'demultiplex', args.reads_file, adapter] + barcodes,
                    stdout=PIPE,
                    stderr=DEVNULL)
    out, err = process.communicate()

    demux_files = [f for f in os.listdir('.') if f.startswith('demux')]

    for demux_file in demux_files:
        print(export(demux_file))
        d = {
            'process': 'upload-fastq-single',
            'input': {
                'src': [demux_file]
            }
        }

        print('run {}'.format(json.dumps(d, separators=(',', ':'))))
Esempio n. 2
0
def main():
    """Invoked when run directly as a program."""
    args = parse_arguments()

    terms = defaultdict(list)

    with gzopen(args.gaf_file) as gaf:
        for line in gaf:
            if line.startswith('!'):
                continue
            go_data = line.strip().split('\t')
            terms[go_data[4]].append(go_data[1])

    for go_term in terms:
        outfile = '{}.tab'.format(str(go_term).replace(':', ''))
        with open(outfile, "wt") as f:
            writer = csv.writer(f, delimiter=str('\t'), lineterminator='\n')
            for gene in terms[go_term]:
                writer.writerow([gene])

            print(export(outfile))
            d = {
                'process': 'upload-geneset',
                'input': {
                    'src': outfile,
                    'source': args.source
                }
            }

        print('run {}'.format(json.dumps(d, separators=(',', ':'))))

    print('{{"num_genesets":{}}}'.format(len(terms)))
Esempio n. 3
0
def main():
    """Invoked when run directly as a program."""
    args = parse_arguments()

    terms = defaultdict(list)

    with gzopen(args.gaf_file) as gaf:
        for line in gaf:
            if line.startswith('!'):
                continue
            go_data = line.strip().split('\t')
            terms[go_data[4]].append(go_data[1])

    for go_term in terms:
        outfile = '{}.tab'.format(str(go_term).replace(':', ''))
        with open(outfile, "wt") as f:
            writer = csv.writer(f, delimiter=str('\t'), lineterminator='\n')
            for gene in terms[go_term]:
                writer.writerow([gene])

            print(export(outfile))
            d = {
                'process': 'upload-geneset',
                'input': {
                    'src': outfile,
                    'source': args.source
                }
            }

        print('run {}'.format(json.dumps(d, separators=(',', ':'))))

    print('{{"num_genesets":{}}}'.format(len(terms)))
Esempio n. 4
0
nsamples = len(exprs)
progress_step = nsamples / 10.0
progress_milestone = progress_step
rartifact = re.compile("^X[0-9]")
gene_ids = [g[1:] if rartifact.match(g) else g for g in exprs[0]]  # pylint: disable=unsubscriptable-object

for i in range(1, nsamples):
    sample_id = header[i]
    if var_samples is not None and sample_id not in var_samples:
        continue

    fname = "temp/{}.tab".format(sample_id)

    with open(fname, "wb") as tabfile:
        tabwriter = csv.writer(tabfile, delimiter="\t", quotechar='"', quoting=csv.QUOTE_MINIMAL)
        tabwriter.writerow(["Gene", "Expression"])
        tabwriter.writerows(zip(gene_ids, exprs[i]))  # pylint: disable=unsubscriptable-object

    d = {"process": "upload-expression", "input": {"exp": os.path.basename(fname), "exp_type": "Log2"}}

    if var_samples is not None:
        d["var_template"] = var_template
        d["var"] = var_samples[sample_id]

    print(export(fname))
    print("run {}".format(json.dumps(d, separators=(",", ":"))))

    if i >= progress_milestone:
        print('{{"proc.progress": {}}}'.format(args.progress + (1.0 - args.progress) * i / nsamples))
        progress_milestone += progress_step
Esempio n. 5
0
for i in range(1, nsamples):
    sample_id = header[i]
    if var_samples is not None and sample_id not in var_samples:
        continue

    fname = 'temp/{}.tab'.format(sample_id)

    with open(fname, 'wb') as tabfile:
        tabwriter = csv.writer(tabfile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        tabwriter.writerow(['Gene', 'Expression'])
        tabwriter.writerows(zip(gene_ids, exprs[i]))  # pylint: disable=unsubscriptable-object

    d = {
        'process': 'upload-expression',
        'input': {
            'exp': os.path.basename(fname),
            'exp_type': 'Log2'
        }
    }

    if var_samples is not None:
        d['var_template'] = var_template
        d['var'] = var_samples[sample_id]

    print(export(fname))
    print('run {}'.format(json.dumps(d, separators=(',', ':'))))

    if i >= progress_milestone:
        print('{{"proc.progress": {}}}'.format(args.progress + (1. - args.progress) * i / nsamples))
        progress_milestone += progress_step
 def test_missing_file(self, isfile_mock):
     self.assertEqual(export('foo.txt'),
         '{"proc.error": "Referenced file does not exist: \'foo.txt\'."}')
 def test_filename(self, isfile_mock):
     self.assertEqual(export('foo.txt'), 'export foo.txt')
Esempio n. 8
0
        for f in files:
            files[f].close()

    return filenames


filenames = read_multiplexed(reads1, reads2, args.barcodes, pool_maps,
                             args.progress_start)

for name in filenames:
    if reads2:
        if name.endswith('_mate2.fq.gz'):
            continue

        name2 = name.replace('_mate1', '_mate2')
        print(export(name))
        print(export(name2))
        d = {
            'process': 'upload-fastq-paired',
            'input': {
                'src1': [name],
                'src2': [name2]
            }
        }
    else:
        print(export(name))
        d = {'process': 'upload-fastq-single', 'input': {'src': [name]}}

    print('run {}'.format(json.dumps(d, separators=(',', ':'))))
 def test_missing_file(self, isfile_mock):
     self.assertEqual(
         export('foo.txt'),
         '{"proc.error": "Referenced file does not exist: \'foo.txt\'."}')
 def test_filename(self, isfile_mock):
     self.assertEqual(export('foo.txt'), 'export foo.txt')
Esempio n. 11
0
        for f in files:
            files[f].close()

    return filenames


filenames = read_multiplexed(reads1, reads2, args.barcodes, pool_maps, args.progress_start)

for name in filenames:
    if reads2:
        if name.endswith('_mate2.fq.gz'):
            continue

        name2 = name.replace('_mate1', '_mate2')
        print(export(name))
        print(export(name2))
        d = {
            'process': 'upload-fastq-paired',
            'input': {
                'src1': [name],
                'src2': [name2]
            }
        }
    else:
        print(export(name))
        d = {
            'process': 'upload-fastq-single',
            'input': {
                'src': [name]
            }