def async_run_enrich(specie, genefile): tmp_dir = randSeq(k=5) enrich_dir = os.path.join(UPLOAD_FOLDER, 'enrich', tmp_dir) processor.Run("mkdir {tmpDir}".format(tmpDir=enrich_dir)) if type(genefile) == type([]): filepath = os.path.join(enrich_dir, 'tmp.gene.list') f = open(filepath, 'w') for each in genefile: f.write(each + '\n') f.close() else: filepath = os.path.join(enrich_dir, genefile) processor.Run("mv {gene_file} {tmpDir}".format(gene_file=os.path.join( UPLOAD_FOLDER, 'enrich', genefile), tmpDir=enrich_dir)) cmd = "{pypath} {script}\ -s '{specie}' \ -g {genelist} \ -o {dir}".format(pypath=KOBAS_PATH, script=ENRICH_SCRIPT, specie=specie, genelist=filepath, dir=enrich_dir) # print(cmd) # print(os.environ['HOME']) processor.shRun(cmd) href = [] body = [] try: f = open(os.path.join(enrich_dir, 'enrich.txt'), 'r') head = f.readline().strip().split('\t') head = head[:7] + head[9:] row = f.readline() while row: row_list = row.strip().split('\t') href.append(row_list[8]) body.append(row_list[:7] + row_list[9:]) row = f.readline() # processor.Run("rm -rf {0}".format(enrich_dir)) return { 'task': 'enrich', 'result': { 'header': head, 'body': body, 'href': href } } except IOError as e: print(e) # processor.Run("rm -rf {0}".format(enrich_dir)) return { 'task': 'enrich', 'result': { 'header': [], 'body': [], 'href': [] } }
def run_annotation(vcf_file, annotation_database): annotation_prefix = '-'.join([str(int(time.time())), vcf_file.split('.vcf.gz')[0]]) cmd ="{script} {vcf_file} {annotation_database} {prefix}".format( script=vcf_ann_script, vcf_file=os.path.join(UPLOAD_FOLDER, 'vcf_ann', vcf_file), annotation_database=annotation_database, prefix=os.path.join(UPLOAD_FOLDER, 'vcf_ann', annotation_prefix) ) processor.shRun(cmd) processor.Run("zip {zipfile} {files}".format( zipfile=os.path.join(UPLOAD_FOLDER, 'vcf_ann', annotation_prefix + '.zip'), files=os.path.join(UPLOAD_FOLDER, 'vcf_ann', annotation_prefix) + '.ann.vcf.*')) return annotation_prefix + '.zip'
def fetch_vcf_samples(vcf, vcf_type="WES"): # split vcf return each sample fetch_sample_cmd = "sh {script} {vcf}".format( script=extract_vcf_sample_script, vcf=vcf) processor.shRun(fetch_sample_cmd) f = open(os.path.join(Config.VCF_FILE_PATH, vcf + '.sample_name'), 'r') samples = [each.strip() for each in f.readlines()] f.close() tc_series = tc_map(vcf, vcf_type, samples) # split each sample split_vcf_cmd = "sh {script} {vcf} {map_file}".format( script=split_vcf_sample_script, vcf=vcf, map_file=vcf + '.idmap') processor.shRun(split_vcf_cmd) return tc_series
def async_fetch_vcf_samples(vcf, username, vcf_type="WES"): # split vcf return each sample fetch_sample_cmd = "sh {script} {vcf}".format( script=extract_vcf_sample_script, vcf=vcf) processor.shRun(fetch_sample_cmd) f = open(os.path.join(Config.VCF_FILE_PATH, vcf + '.sample_name'), 'r') samples = [each.strip() for each in f.readlines()] f.close() tc_series = tc_map(vcf, vcf_type, samples) # creare sample in mysql for each in tc_series: row = Data(tc_id=each[0], provider=username, sample_name=each[1], type=vcf_type) row.save() # split each sample split_vcf_cmd = "sh {script} {vcf} {map_file}".format( script=split_vcf_sample_script, vcf=vcf, map_file=vcf + '.idmap') processor.shRun(split_vcf_cmd) return { 'task': 'vcf_upload', 'result': '{0} upload success...'.format(vcf) }
def snp_info(info): tmp_param = { 'not_a_group_id': info['group'], 'group_names': ["not_a_group_id"] } genes = info.get('gene_list') if genes: gene_list = parseInput(genes) if len(gene_list) == 0: return {'task': 'snp_info', 'result': {'header': [], 'body': []}} tmp_param.update({'gene_id': gene_list}) else: tmp_param.update({ 'chrom': info['chr'], 'chrom_start': info['pos_start'], 'chrom_end': info['pos_end'] }) cmd = "snpInf \ --gene_bed {gene_bed} \ --vcf_ann_file /data/wheatdb/data/vcf_private/snp.ann.table.pkl \ --vcf_dir {vcf_table} \ --outdir {outdir} \ --parameters '{param}'".format( gene_bed=gene_bed_file, vcf_table=Config.VCF_TABLE_PATH, param=json.dumps(tmp_param), outdir=ANN_PATH, ) print(cmd) result = processor.shRun(cmd) if result: head_data = result[0].split('\t') body_data = [row.split('\t') for row in result[1:]] return { 'task': 'snp_info', 'result': { 'header': head_data, 'body': body_data } } return {'task': 'snp_info', 'result': {'header': [], 'body': []}}
def compare_info(info): genes = info.get('gene_id') if genes: gene_list = parseInput(genes) if len(gene_list) == 0: return { 'task': 'compare_info', 'result': { 'header': [], 'body': [] } } info['gene_id'] = gene_list cmd = "snpInf \ --gene_bed {gene_bed} \ --vcf_ann_file /data/wheatdb/data/vcf_private/snp.ann.table.pkl \ --vcf_dir {vcf_table} \ --outdir {outdir} \ --parameters '{param}'".format( gene_bed=gene_bed_file, vcf_table=Config.VCF_TABLE_PATH, param=json.dumps(info), outdir=ANN_PATH, ) #print(cmd) result = processor.shRun(cmd) if result: head_data = result[0].split('\t') body_data = [row.split('\t') for row in result[1:]] return { 'task': 'compare_info', 'result': { 'header': head_data, 'body': body_data } } return {'task': 'compare_info', 'result': {'header': [], 'body': []}}
def run_bsa(info): freq_pattern = 'snp.freq.plot.jpg' score_pattern = 'var.score.plot.jpg' print(info) cmd = "snpScore -p '{info}' -d {vcf_dir} -o {out_dir} --vcf_ann_file /data/wheatdb/data/vcf_private/snp.ann.table.pkl".format( info=info, vcf_dir=Config.VCF_TABLE_PATH, out_dir=MAPPING_PATH) # test print(cmd) result = processor.shRun(cmd) result_base = result[0] result_path = os.path.join(result_base, 'results') print(result_path) processor.Run(cmd="cd {dir} && zip -r {zip_file} results".format( zip_file=os.path.join(result_base, 'results.zip'), dir=result_base)) print("cd {dir} && zip -r {zip_file} results".format(zip_file=os.path.join( result_base, 'results.zip'), dir=result_base)) all_files = os.listdir(result_path) path = result_path.split('/home/app/vcfweb/wheatdb/app')[-1] freq_files = [ os.path.join(path, file) for file in all_files if file[-len(freq_pattern):] == freq_pattern ] score_files = [ os.path.join(path, file) for file in all_files if file[-len(score_pattern):] == score_pattern ] return { 'task': 'bsa', 'result': { 'path': os.path.join(path, '../results.zip'), 'files': freq_files + score_files } }