def main(): """Invoke when run directly as a program.""" args = parse_arguments() demux_data = _tsv_to_dict(args.annotation_file) barcodes = list(demux_data.keys()) adapters_temp = list(set([demux_data[barcode]["3' adapter"] for barcode in barcodes])) if len(adapters_temp) != 1: print('{"proc.error":"Only one adapter type is allowed per demultiplexing job."}') exit(1) adapter = ''.join([adpr.split('_')[0] for adpr in adapters_temp[0].split(',')]) process = Popen(['iCount', 'demultiplex', args.reads_file, adapter] + barcodes, stdout=PIPE, stderr=DEVNULL) out, err = process.communicate() demux_files = [f for f in os.listdir('.') if f.startswith('demux')] for demux_file in demux_files: print(export(demux_file)) d = { 'process': 'upload-fastq-single', 'input': { 'src': [demux_file] } } print('run {}'.format(json.dumps(d, separators=(',', ':'))))
def main(): """Invoked when run directly as a program.""" args = parse_arguments() terms = defaultdict(list) with gzopen(args.gaf_file) as gaf: for line in gaf: if line.startswith('!'): continue go_data = line.strip().split('\t') terms[go_data[4]].append(go_data[1]) for go_term in terms: outfile = '{}.tab'.format(str(go_term).replace(':', '')) with open(outfile, "wt") as f: writer = csv.writer(f, delimiter=str('\t'), lineterminator='\n') for gene in terms[go_term]: writer.writerow([gene]) print(export(outfile)) d = { 'process': 'upload-geneset', 'input': { 'src': outfile, 'source': args.source } } print('run {}'.format(json.dumps(d, separators=(',', ':')))) print('{{"num_genesets":{}}}'.format(len(terms)))
nsamples = len(exprs) progress_step = nsamples / 10.0 progress_milestone = progress_step rartifact = re.compile("^X[0-9]") gene_ids = [g[1:] if rartifact.match(g) else g for g in exprs[0]] # pylint: disable=unsubscriptable-object for i in range(1, nsamples): sample_id = header[i] if var_samples is not None and sample_id not in var_samples: continue fname = "temp/{}.tab".format(sample_id) with open(fname, "wb") as tabfile: tabwriter = csv.writer(tabfile, delimiter="\t", quotechar='"', quoting=csv.QUOTE_MINIMAL) tabwriter.writerow(["Gene", "Expression"]) tabwriter.writerows(zip(gene_ids, exprs[i])) # pylint: disable=unsubscriptable-object d = {"process": "upload-expression", "input": {"exp": os.path.basename(fname), "exp_type": "Log2"}} if var_samples is not None: d["var_template"] = var_template d["var"] = var_samples[sample_id] print(export(fname)) print("run {}".format(json.dumps(d, separators=(",", ":")))) if i >= progress_milestone: print('{{"proc.progress": {}}}'.format(args.progress + (1.0 - args.progress) * i / nsamples)) progress_milestone += progress_step
for i in range(1, nsamples): sample_id = header[i] if var_samples is not None and sample_id not in var_samples: continue fname = 'temp/{}.tab'.format(sample_id) with open(fname, 'wb') as tabfile: tabwriter = csv.writer(tabfile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL) tabwriter.writerow(['Gene', 'Expression']) tabwriter.writerows(zip(gene_ids, exprs[i])) # pylint: disable=unsubscriptable-object d = { 'process': 'upload-expression', 'input': { 'exp': os.path.basename(fname), 'exp_type': 'Log2' } } if var_samples is not None: d['var_template'] = var_template d['var'] = var_samples[sample_id] print(export(fname)) print('run {}'.format(json.dumps(d, separators=(',', ':')))) if i >= progress_milestone: print('{{"proc.progress": {}}}'.format(args.progress + (1. - args.progress) * i / nsamples)) progress_milestone += progress_step
def test_missing_file(self, isfile_mock): self.assertEqual(export('foo.txt'), '{"proc.error": "Referenced file does not exist: \'foo.txt\'."}')
def test_filename(self, isfile_mock): self.assertEqual(export('foo.txt'), 'export foo.txt')
for f in files: files[f].close() return filenames filenames = read_multiplexed(reads1, reads2, args.barcodes, pool_maps, args.progress_start) for name in filenames: if reads2: if name.endswith('_mate2.fq.gz'): continue name2 = name.replace('_mate1', '_mate2') print(export(name)) print(export(name2)) d = { 'process': 'upload-fastq-paired', 'input': { 'src1': [name], 'src2': [name2] } } else: print(export(name)) d = {'process': 'upload-fastq-single', 'input': {'src': [name]}} print('run {}'.format(json.dumps(d, separators=(',', ':'))))
def test_missing_file(self, isfile_mock): self.assertEqual( export('foo.txt'), '{"proc.error": "Referenced file does not exist: \'foo.txt\'."}')
for f in files: files[f].close() return filenames filenames = read_multiplexed(reads1, reads2, args.barcodes, pool_maps, args.progress_start) for name in filenames: if reads2: if name.endswith('_mate2.fq.gz'): continue name2 = name.replace('_mate1', '_mate2') print(export(name)) print(export(name2)) d = { 'process': 'upload-fastq-paired', 'input': { 'src1': [name], 'src2': [name2] } } else: print(export(name)) d = { 'process': 'upload-fastq-single', 'input': { 'src': [name] }