def pre_process(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file', type=argparse.FileType('r+')) try: args = parse.parse_args() except IOError as err: print("Input file error: %s" % err) parse.print_help() sys.exit(1) meta = json.load(args.meta_file) meta.update({ "is_ampliseq": None, "primary_bed": None, "hotspot_bed": None }) is_zip = zipfile.is_zipfile(args.upload_file) if is_zip: files = unzip_archive(args.path, args.upload_file) else: files = [args.upload_file] if len(files) == 1 and files[0].endswith('.bed'): meta['is_ampliseq'] = False meta['primary_bed'] = files[0] elif "plan.json" in files: print("Found ampliseq") meta['is_ampliseq'] = True plan_data = json.load(open(os.path.join(args.path, "plan.json"))) version, design = ampliseq.handle_versioned_plans(plan_data) meta['design'] = design plan = design['plan'] try: meta['primary_bed'] = plan['designed_bed'] meta['secondary_bed'] = plan['hotspot_bed'] if 'reference' not in meta: meta['reference'] = plan['genome'].lower() except KeyError as err: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") raise print(meta) else: raise ValueError("Upload must be either valid Ampliseq export or contain a single BED file.") args.meta_file.truncate(0) args.meta_file.seek(0) json.dump(meta, args.meta_file) api.patch("contentupload", args.upload_id, meta=meta)
def pre_process(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file', type=argparse.FileType('r+')) try: args = parse.parse_args() except IOError as err: print("Input file error: %s" % err) parse.print_help() sys.exit(1) meta = json.load(args.meta_file, parse_float=Decimal) meta.update({ "is_ampliseq": None, "primary_bed": None, "hotspot_bed": None }) is_zip = zipfile.is_zipfile(args.upload_file) if is_zip: files = unzip_archive(args.path, args.upload_file) else: files = [args.upload_file] if len(files) == 1 and files[0].endswith('.bed'): meta['is_ampliseq'] = False meta['primary_bed'] = files[0] elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']: # convert vcf to bed target_filename = os.path.join(args.path,os.path.basename(files[0])) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % files[0] convert_command += ' --output-bed %s' % target_filename convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path?? convert_command += ' --filter-bypass on' process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) for line in process.communicate()[0].splitlines(): api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False meta['primary_bed'] = target_filename ''' elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']: # convert vcf to bed target_filename = os.path.join(args.path,os.path.basename(files[0])) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % files[0] convert_command += ' --output-bed %s' % target_filename convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path?? convert_command += ' --filter-bypass on' process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) for line in process.communicate()[0]: api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False meta['primary_bed'] = target_filename ''' elif "plan.json" in files: print("Found ampliseq") meta['is_ampliseq'] = True plan_data = json.load(open(os.path.join(args.path, "plan.json")), parse_float=Decimal) version, design = ampliseq.handle_versioned_plans(plan_data) meta['design'] = design plan = design['plan'] try: meta['primary_bed'] = plan['designed_bed'] meta['secondary_bed'] = plan['hotspot_bed'] if not meta.get("reference", None): meta['reference'] = design['genome'].lower() except KeyError as err: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") raise print(meta) else: raise ValueError("Upload must be either valid Ampliseq export or contain a single BED file.") args.meta_file.truncate(0) args.meta_file.seek(0) json.dump(meta, args.meta_file, cls=JSONEncoder) api.patch("contentupload", args.upload_id, meta=meta)
def main(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file') try: args = parse.parse_args() except IOError as err: print("ERROR: Input file error: %s" % err) parse.print_help() sys.exit(1) with open(args.meta_file) as f: meta = json.load(f, parse_float=Decimal) files = meta.get('pre_process_files') target_regions_bed = None hotspots_bed = None meta['is_ampliseq'] = False if len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == False: target_regions_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False print "Content: Target regions file in BED format" print elif len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == True: hotspots_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False print "Content: Hotspots file in BED format" print elif len(files) == 1 and files[0].endswith('.vcf') and meta.get('hotspot', False) == True: print "Content: Hotspots file in VCF format" print print "Converting hotspot VCF file to BED: %s" % files[0] print hotspots_bed = os.path.basename(files[0]) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % os.path.join(args.path, os.path.basename(files[0])) convert_command += ' --output-bed %s' % os.path.join(args.path, hotspots_bed) convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % ( meta["reference"], meta["reference"]) convert_command += ' --filter-bypass on' p = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) print p.communicate()[0] if p.returncode != 0: sys.exit(p.returncode) #process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) #for line in process.communicate()[0].splitlines(): # api.post('log', upload='/rundb/api/v1/contentupload/%s/' % # str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False elif "plan.json" in files: print "Content: AmpliSeq ZIP" print meta['is_ampliseq'] = True plan_data = json.load(open(os.path.join(args.path, "plan.json"))) version, design, meta = ampliseq.handle_versioned_plans(plan_data, meta, args.path) meta['design'] = design try: target_regions_bed = design['plan']['designed_bed'] hotspots_bed = design['plan']['hotspot_bed'] if not meta.get("reference", None): meta['reference'] = design['genome'] if 'design_name' in plan_data: meta['description'] = design['design_name'] api.update_meta(meta, args) except KeyError as err: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Malformed AmpliSeq archive: missing json key "+str(err) sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err)) #raise if target_regions_bed and target_regions_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Target region file %s not present in AmpliSeq archive" % target_regions_bed sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), # text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed) #raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed) if hotspots_bed and hotspots_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), # text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) #raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) else: api.patch("contentupload", args.upload_id, status="Error: Unrecognized upload type.") print print "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file." sys.exit(1) ''' === Validate and Register === ''' primary_path = None secondary_path = None if is_BED_encrypted(meta): if target_regions_bed: meta['design']['plan']['designed_bed'] = '' if hotspots_bed: meta['design']['plan']['hotspot_bed'] = '' primary_path = "" secondary_path = "" else: if target_regions_bed: primary_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED') if hotspots_bed: secondary_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED') meta["hotspot"] = False if target_regions_bed and not primary_path: register_bed_file(args.upload_id, args.path, meta, target_regions_bed) if hotspots_bed: meta["hotspot"] = True if not secondary_path: register_bed_file(args.upload_id, args.path, meta, hotspots_bed) if meta['is_ampliseq']: try: if not (is_BED_encrypted(meta)): if target_regions_bed and not primary_path: primary_path = os.path.join( args.path, meta["reference"]+"/unmerged/detail/"+target_regions_bed) if hotspots_bed and not secondary_path: secondary_path = os.path.join( args.path, meta["reference"]+"/unmerged/detail/"+hotspots_bed) else: run_type = meta['design']['plan'].get('runType', None) if run_type and (run_type == "AMPS_RNA"): meta['reference'] = None plan_prototype, alignmentargs_override = plan_json( meta, args.upload_id, primary_path, secondary_path) success, response, content = api.post("plannedexperiment", **plan_prototype) if not success: api.patch("contentupload", args.upload_id, status="Error: unable to create TS Plan") err_content = json.loads(content) error_message_array = [] if 'error' in err_content: error_json = json.loads(str(err_content['error'][3:-2])) for k in error_json: for j in range(len(error_json[k])): err_message = str(error_json[k][j]) err_message = err_message.replace('>', '>') error_message_array.append(err_message) error_messages = ','.join(error_message_array) raise Exception(error_messages) if alignmentargs_override: content_dict = json.loads(content) api.patch("plannedexperiment", content_dict[ "id"], alignmentargs=alignmentargs_override, thumbnailalignmentargs=alignmentargs_override) except Exception as err: print("ERROR: Could not create plan from this zip: %s." % err) raise api.update_meta(meta, args)
def main(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file') try: args = parse.parse_args() except IOError as err: print("Input file error: %s" % err) parse.print_help() sys.exit(1) meta_file_handle = open(args.meta_file,'r') meta = json.load(meta_file_handle, parse_float=Decimal) meta_file_handle.close() is_zip = zipfile.is_zipfile(args.upload_file) if is_zip: files = file_utils.unzip_archive(args.path, args.upload_file) elif args.upload_file.endswith('.gz'): files = [os.path.basename(args.upload_file[:-3])] cmd = 'gzip -dc %s > %s ' % (args.upload_file, os.path.join(args.path,files[0])) subprocess.call(cmd, shell=True) else: files = [args.upload_file] ''' Establish the upload type ''' target_regions_bed = None hotspots_bed = None meta['is_ampliseq'] = False if len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot',False) == False: target_regions_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False elif len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot',False) == True: hotspots_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False elif len(files) == 1 and files[0].endswith('.vcf') and meta.get('hotspot',False) == True: hotspots_bed = os.path.basename(files[0]) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % os.path.join(args.path,os.path.basename(files[0])) convert_command += ' --output-bed %s' % os.path.join(args.path,hotspots_bed) convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path?? convert_command += ' --filter-bypass on' process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) for line in process.communicate()[0].splitlines(): api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False elif "plan.json" in files: meta['is_ampliseq'] = True plan_data = json.load(open(os.path.join(args.path, "plan.json"))) version, design = ampliseq.handle_versioned_plans(plan_data, meta) meta['design'] = design try: target_regions_bed = design['plan']['designed_bed'] hotspots_bed = design['plan']['hotspot_bed'] if not meta.get("reference", None): meta['reference'] = design['genome'].lower() if 'design_name' in plan_data: meta['description'] = design['design_name'] meta_file_handle = open(args.meta_file,'w') json.dump(meta, meta_file_handle, cls=JSONEncoder) meta_file_handle.close() api.patch("contentupload", args.upload_id, meta=meta) except KeyError as err: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err)) raise if target_regions_bed and target_regions_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed) raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed) if hotspots_bed and hotspots_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) else: api.patch("contentupload", args.upload_id, status="Error: Upload must be either valid Ampliseq export or contain a single BED or VCF file.") raise ValueError("Upload must be either valid Ampliseq export or contain a single BED or VCF file.") ''' === Validate and Register === ''' primary_path = None secondary_path = None if target_regions_bed: primary_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED') if hotspots_bed: secondary_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED') meta["hotspot"] = False if target_regions_bed and not primary_path: register_bed_file(args.upload_id, args.path, meta, target_regions_bed) if hotspots_bed: meta["hotspot"] = True if not secondary_path: register_bed_file(args.upload_id, args.path, meta, hotspots_bed) if meta['is_ampliseq']: try: if target_regions_bed and not primary_path: primary_path = os.path.join(args.path, meta["reference"]+"/unmerged/detail/"+target_regions_bed) if hotspots_bed and not secondary_path: secondary_path = os.path.join(args.path, meta["reference"]+"/unmerged/detail/"+hotspots_bed) plan_prototype = plan_json(meta, primary_path, secondary_path) api.post("plannedexperiment", **plan_prototype) except Exception as err: print("Could not create plan from this zip: %s" % err) meta_file_handle = open(args.meta_file,'w') json.dump(meta, meta_file_handle, cls=JSONEncoder) meta_file_handle.close() api.patch("contentupload", args.upload_id, meta=meta)
def main(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file') try: args = parse.parse_args() except IOError as err: print("ERROR: Input file error: %s" % err) parse.print_help() sys.exit(1) meta_file_handle = open(args.meta_file, 'r') meta = json.load(meta_file_handle, parse_float=Decimal) meta_file_handle.close() print "Uploaded file: " + os.path.basename(args.upload_file) is_zip = zipfile.is_zipfile(args.upload_file) if is_zip: files = file_utils.unzip_archive(args.path, args.upload_file) print "Compressed: Yes (zip)" elif args.upload_file.endswith('.gz'): print "Compressed: Yes (gzip)" files = [os.path.basename(args.upload_file[:-3])] cmd = 'gzip -dc %s > %s ' % (args.upload_file, os.path.join(args.path, files[0])) p = subprocess.Popen(cmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) print p.communicate()[0] if p.returncode != 0: sys.exit(p.returncode) subprocess.call(cmd, shell=True) else: print "Compressed: No" files = [args.upload_file] ''' Establish the upload type ''' target_regions_bed = None hotspots_bed = None meta['is_ampliseq'] = False if len(files) == 1 and files[0].endswith('.bed') and meta.get( 'hotspot', False) == False: target_regions_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False print "Content: Target regions file in BED format" print elif len(files) == 1 and files[0].endswith('.bed') and meta.get( 'hotspot', False) == True: hotspots_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False print "Content: Hotspots file in BED format" print elif len(files) == 1 and files[0].endswith('.vcf') and meta.get( 'hotspot', False) == True: print "Content: Hotspots file in VCF format" print print "Converting hotspot VCF file to BED: %s" % files[0] print hotspots_bed = os.path.basename(files[0]) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % os.path.join( args.path, os.path.basename(files[0])) convert_command += ' --output-bed %s' % os.path.join( args.path, hotspots_bed) convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % ( meta["reference"], meta["reference"]) convert_command += ' --filter-bypass on' p = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) print p.communicate()[0] if p.returncode != 0: sys.exit(p.returncode) #process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) #for line in process.communicate()[0].splitlines(): # api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False elif "plan.json" in files: print "Content: AmpliSeq ZIP" print meta['is_ampliseq'] = True plan_data = json.load(open(os.path.join(args.path, "plan.json"))) version, design, meta = ampliseq.handle_versioned_plans( plan_data, meta) meta['design'] = design try: target_regions_bed = design['plan']['designed_bed'] hotspots_bed = design['plan']['hotspot_bed'] if not meta.get("reference", None): meta['reference'] = design['genome'].lower() if 'design_name' in plan_data: meta['description'] = design['design_name'] meta_file_handle = open(args.meta_file, 'w') json.dump(meta, meta_file_handle, cls=JSONEncoder) meta_file_handle.close() api.patch("contentupload", args.upload_id, meta=meta) except KeyError as err: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Malformed AmpliSeq archive: missing json key " + str( err) sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err)) #raise if target_regions_bed and target_regions_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Target region file %s not present in AmpliSeq archive" % target_regions_bed sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), # text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed) #raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed) if hotspots_bed and hotspots_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), # text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) #raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) else: api.patch("contentupload", args.upload_id, status="Error: Unrecognized upload type.") print print "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file." sys.exit(1) ''' === Validate and Register === ''' primary_path = None secondary_path = None if target_regions_bed: primary_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED') if hotspots_bed: secondary_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED') meta["hotspot"] = False if target_regions_bed and not primary_path: register_bed_file(args.upload_id, args.path, meta, target_regions_bed) if hotspots_bed: meta["hotspot"] = True if not secondary_path: register_bed_file(args.upload_id, args.path, meta, hotspots_bed) if meta['is_ampliseq']: try: if target_regions_bed and not primary_path: primary_path = os.path.join( args.path, meta["reference"] + "/unmerged/detail/" + target_regions_bed) if hotspots_bed and not secondary_path: secondary_path = os.path.join( args.path, meta["reference"] + "/unmerged/detail/" + hotspots_bed) plan_prototype = plan_json(meta, args.upload_id, primary_path, secondary_path) success, response, content = api.post("plannedexperiment", **plan_prototype) if not success: api.patch("contentupload", args.upload_id, status="Error: unable to create TS Plan") raise Exception("Plan creation API request failed.") except Exception as err: print("ERROR: Could not create plan from this zip: %s" % err) raise meta_file_handle = open(args.meta_file, 'w') json.dump(meta, meta_file_handle, cls=JSONEncoder) meta_file_handle.close() api.patch("contentupload", args.upload_id, meta=meta)
def main(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file') try: args = parse.parse_args() except IOError as err: print("ERROR: Input file error: %s" % err) parse.print_help() sys.exit(1) with open(args.meta_file) as f: meta = json.load(f, parse_float=Decimal) files = meta.get('pre_process_files') target_regions_bed = None hotspots_bed = None sse_bed = None meta['is_ampliseq'] = False if len(files) == 1 and files[0].endswith('.bed') and meta.get( 'hotspot', False) == False: target_regions_bed = os.path.basename(files[0]) print "Content: Target regions file in BED format" print elif len(files) == 1 and files[0].endswith('.bed') and meta.get( 'hotspot', False) == True: hotspots_bed = os.path.basename(files[0]) print "Content: Hotspots file in BED format" print elif len(files) == 1 and files[0].endswith('.vcf') and meta.get( 'hotspot', False) == True: print "Content: Hotspots file in VCF format" print print "Converting hotspot VCF file to BED: %s" % files[0] print hotspots_bed = os.path.basename(files[0]) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % os.path.join( args.path, os.path.basename(files[0])) convert_command += ' --output-bed %s' % os.path.join( args.path, hotspots_bed) convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % ( meta["reference"], meta["reference"]) convert_command += ' --filter-bypass on' p = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) print p.communicate()[0] if p.returncode != 0: sys.exit(p.returncode) #process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) #for line in process.communicate()[0].splitlines(): # api.post('log', upload='/rundb/api/v1/contentupload/%s/' % # str(args.upload_id), text=line.strip()) elif "plan.json" in files: # Call the validation script from ampliseq. validate reference, target bed and hotspot print "Content: AmpliSeq ZIP\n" meta['is_ampliseq'] = True isRefInstallInProgress, meta = ampliseq.validate_ampliSeq_bundle( meta, args) api.update_meta(meta, args) """ If reference mentioned in the plan.json (get the info from "genome_reference") is not installed in the TS, - wait for the ref to be installed - the subtask finish_me will be called at the end of the reference install - process to restart validation of the upload """ if isRefInstallInProgress: return target_regions_bed = meta['design']['plan'].get('designed_bed', '') hotspots_bed = meta['design']['plan'].get('hotspot_bed', '') sse_bed = meta['design']['plan'].get('sse_bed', '') else: api.patch("contentupload", args.upload_id, status="Error: Unrecognized upload type.") print print "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file." sys.exit(1) ''' === Validate and Register === ''' target_regions_bed_path = "" hotspots_bed_path = "" sse_bed_path = "" isBED_Encrypted = is_BED_encrypted(meta) if target_regions_bed: if isBED_Encrypted: meta['design']['plan']['designed_bed'] = '' else: target_regions_bed_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED') if not target_regions_bed_path: meta["hotspot"] = False target_regions_bed_path = register_bed_file( args.upload_id, args.path, meta, target_regions_bed) if hotspots_bed: if isBED_Encrypted: meta['design']['plan']['hotspot_bed'] = '' else: hotspots_bed_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED') if not hotspots_bed_path: meta["hotspot"] = True hotspots_bed_path = register_bed_file(args.upload_id, args.path, meta, hotspots_bed) if sse_bed: if isBED_Encrypted: meta['design']['plan']['sse_bed'] = '' else: sse_bed_path = validate(args.upload_id, args.path, meta, sse_bed, 'SSE BED') if not sse_bed_path: meta["hotspot"] = False meta["sse"] = True meta["sse_target_region_file"] = target_regions_bed_path sse_bed_path = register_bed_file(args.upload_id, args.path, meta, sse_bed) if meta['is_ampliseq']: if isBED_Encrypted: run_type = meta['design']['plan'].get('runType', '') if run_type == "AMPS_RNA": meta['reference'] = None api.update_meta(meta, args) # parse,process and convert the ampliseq plan.json to TS supported plan and post success, isUploadFailed, errMsg = ampliseq.convert_AS_to_TS_plan_and_post( meta, args, target_regions_bed_path, hotspots_bed_path, sse_bed_path) if isUploadFailed: print("ERROR: Could not create plan from this zip: %s." % errMsg) raise else: api.update_meta(meta, args)
def pre_process(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file', type=argparse.FileType('r+')) try: args = parse.parse_args() except IOError as err: print("Input file error: %s" % err) parse.print_help() sys.exit(1) meta = json.load(args.meta_file, parse_float=Decimal) meta.update({ "is_ampliseq": None, "primary_bed": None, "hotspot_bed": None }) is_zip = zipfile.is_zipfile(args.upload_file) if is_zip: files = unzip_archive(args.path, args.upload_file) else: files = [args.upload_file] if len(files) == 1 and files[0].endswith('.bed'): meta['is_ampliseq'] = False meta['primary_bed'] = files[0] elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']: # convert vcf to bed target_filename = os.path.join(args.path, os.path.basename( files[0])) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % files[0] convert_command += ' --output-bed %s' % target_filename convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % ( meta["reference"], meta["reference"] ) #TODO: is this just name or full path?? convert_command += ' --filter-bypass on' process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) for line in process.communicate()[0].splitlines(): api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False meta['primary_bed'] = target_filename ''' elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']: # convert vcf to bed target_filename = os.path.join(args.path,os.path.basename(files[0])) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % files[0] convert_command += ' --output-bed %s' % target_filename convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path?? convert_command += ' --filter-bypass on' process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) for line in process.communicate()[0]: api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False meta['primary_bed'] = target_filename ''' elif "plan.json" in files: print("Found ampliseq") meta['is_ampliseq'] = True plan_data = json.load(open(os.path.join(args.path, "plan.json")), parse_float=Decimal) version, design = ampliseq.handle_versioned_plans(plan_data) meta['design'] = design plan = design['plan'] try: meta['primary_bed'] = plan['designed_bed'] meta['secondary_bed'] = plan['hotspot_bed'] if not meta.get("reference", None): meta['reference'] = design['genome'].lower() except KeyError as err: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") raise print(meta) else: raise ValueError( "Upload must be either valid Ampliseq export or contain a single BED file." ) args.meta_file.truncate(0) args.meta_file.seek(0) json.dump(meta, args.meta_file, cls=JSONEncoder) api.patch("contentupload", args.upload_id, meta=meta)
def main(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file') try: args = parse.parse_args() except IOError as err: print("ERROR: Input file error: %s" % err) parse.print_help() sys.exit(1) with open(args.meta_file) as f: meta = json.load(f, parse_float=Decimal) files = meta.get('pre_process_files') target_regions_bed = None hotspots_bed = None sse_bed = None meta['is_ampliseq'] = False if len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == False: target_regions_bed = os.path.basename(files[0]) print "Content: Target regions file in BED format" print elif len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == True: hotspots_bed = os.path.basename(files[0]) print "Content: Hotspots file in BED format" print elif len(files) == 1 and files[0].endswith('.vcf') and meta.get('hotspot', False) == True: print "Content: Hotspots file in VCF format" print print "Converting hotspot VCF file to BED: %s" % files[0] print hotspots_bed = os.path.basename(files[0]) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % os.path.join(args.path, os.path.basename(files[0])) convert_command += ' --output-bed %s' % os.path.join(args.path, hotspots_bed) convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % ( meta["reference"], meta["reference"]) convert_command += ' --filter-bypass on' p = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) print p.communicate()[0] if p.returncode != 0: sys.exit(p.returncode) #process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) #for line in process.communicate()[0].splitlines(): # api.post('log', upload='/rundb/api/v1/contentupload/%s/' % # str(args.upload_id), text=line.strip()) elif "plan.json" in files: # Call the validation script from ampliseq. validate reference, target bed and hotspot print "Content: AmpliSeq ZIP\n" meta['is_ampliseq'] = True isRefInstallInProgress, meta = ampliseq.validate_ampliSeq_bundle(meta, args) api.update_meta(meta, args) """ If reference mentioned in the plan.json (get the info from "genome_reference") is not installed in the TS, - wait for the ref to be installed - the subtask finish_me will be called at the end of the reference install - process to restart validation of the upload """ if isRefInstallInProgress: return target_regions_bed = meta['design']['plan'].get('designed_bed','') hotspots_bed = meta['design']['plan'].get('hotspot_bed','') sse_bed = meta['design']['plan'].get('sse_bed','') else: api.patch("contentupload", args.upload_id, status="Error: Unrecognized upload type.") print print "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file." sys.exit(1) ''' === Validate and Register === ''' target_regions_bed_path = "" hotspots_bed_path = "" sse_bed_path = "" isBED_Encrypted = is_BED_encrypted(meta) if target_regions_bed: if isBED_Encrypted: meta['design']['plan']['designed_bed'] = '' else: target_regions_bed_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED') if not target_regions_bed_path: meta["hotspot"] = False target_regions_bed_path = register_bed_file(args.upload_id, args.path, meta, target_regions_bed) if hotspots_bed: if isBED_Encrypted: meta['design']['plan']['hotspot_bed'] = '' else: hotspots_bed_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED') if not hotspots_bed_path: meta["hotspot"] = True hotspots_bed_path = register_bed_file(args.upload_id, args.path, meta, hotspots_bed) if sse_bed: if isBED_Encrypted: meta['design']['plan']['sse_bed'] = '' else: sse_bed_path = validate(args.upload_id, args.path, meta, sse_bed, 'SSE BED') if not sse_bed_path: meta["hotspot"] = False meta["sse"] = True meta["sse_target_region_file"] = target_regions_bed_path sse_bed_path = register_bed_file(args.upload_id, args.path, meta, sse_bed) if meta['is_ampliseq']: if isBED_Encrypted: run_type = meta['design']['plan'].get('runType', '') if run_type == "AMPS_RNA": meta['reference'] = None api.update_meta(meta, args) # parse,process and convert the ampliseq plan.json to TS supported plan and post success, isUploadFailed, errMsg = ampliseq.convert_AS_to_TS_plan_and_post( meta, args, target_regions_bed_path, hotspots_bed_path, sse_bed_path ) if isUploadFailed: print("ERROR: Could not create plan from this zip: %s." % errMsg) raise else: api.update_meta(meta, args)
def main(): parse = argparse.ArgumentParser() parse.add_argument("upload_id", type=int) parse.add_argument("path") parse.add_argument("upload_file") parse.add_argument("meta_file") try: args = parse.parse_args() except IOError as err: print("ERROR: Input file error: %s" % err) parse.print_help() sys.exit(1) with open(args.meta_file) as f: meta = json.load(f, parse_float=Decimal) files = meta.get("pre_process_files") target_regions_bed = None hotspots_bed = None sse_bed = None meta["is_ampliseq"] = False if (len(files) == 1 and files[0].endswith(".bed") and meta.get("hotspot", False) == False): target_regions_bed = os.path.basename(files[0]) print("Content: Target regions file in BED format") print() elif (len(files) == 1 and files[0].endswith(".bed") and meta.get("hotspot", False) == True): hotspots_bed = os.path.basename(files[0]) print("Content: Hotspots file in BED format") print() elif (len(files) == 1 and files[0].endswith(".vcf") and meta.get("hotspot", False) == True): print("Content: Hotspots file in VCF format") print() print("Converting hotspot VCF file to BED: %s" % files[0]) print() hotspots_bed = os.path.basename(files[0]) + ".bed" convert_command = "/usr/local/bin/tvcutils prepare_hotspots" convert_command += " --input-vcf %s" % os.path.join( args.path, os.path.basename(files[0])) convert_command += " --output-bed %s" % os.path.join( args.path, hotspots_bed) convert_command += ( " --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta" % (meta["reference"], meta["reference"])) convert_command += " --filter-bypass on" p = subprocess.Popen( convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True, ) print(p.communicate()[0]) if p.returncode != 0: sys.exit(p.returncode) # process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) # for line in process.communicate()[0].splitlines(): # api.post('log', upload='/rundb/api/v1/contentupload/%s/' % # str(args.upload_id), text=line.strip()) elif "plan.json" in files: # Call the validation script from ampliseq. validate reference, target bed and hotspot print("Content: AmpliSeq ZIP\n") meta["is_ampliseq"] = True isRefInstallInProgress, meta = ampliseq.validate_ampliSeq_bundle( meta, args) api.update_meta(meta, args) """ If reference mentioned in the plan.json (get the info from "genome_reference") is not installed in the TS, - wait for the ref to be installed - the subtask finish_me will be called at the end of the reference install - process to restart validation of the upload """ if isRefInstallInProgress: return target_regions_bed = meta["design"]["plan"].get("designed_bed", "") hotspots_bed = meta["design"]["plan"].get("hotspot_bed", "") sse_bed = meta["design"]["plan"].get("sse_bed", "") else: api.patch("contentupload", args.upload_id, status="Error: Unrecognized upload type.") print() print( "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file." ) sys.exit(1) """ === Validate and Register === """ target_regions_bed_path = "" hotspots_bed_path = "" sse_bed_path = "" isBED_Encrypted = is_BED_encrypted(meta) if target_regions_bed: if isBED_Encrypted: meta["design"]["plan"]["designed_bed"] = "" else: bed_type = publisher_types.TARGET target_regions_bed_path = validate(args.upload_id, args.path, meta, target_regions_bed, bed_type) if not target_regions_bed_path: meta["hotspot"] = False target_regions_bed_path = register_bed_file( args.upload_id, args.path, meta, target_regions_bed, bed_type) if hotspots_bed: if isBED_Encrypted: meta["design"]["plan"]["hotspot_bed"] = "" else: bed_type = publisher_types.HOTSPOT hotspots_bed_path = validate(args.upload_id, args.path, meta, hotspots_bed, bed_type) if not hotspots_bed_path: meta["hotspot"] = True hotspots_bed_path = register_bed_file(args.upload_id, args.path, meta, hotspots_bed, bed_type) if sse_bed: if isBED_Encrypted: meta["design"]["plan"]["sse_bed"] = "" else: bed_type = publisher_types.SSE sse_bed_path = validate(args.upload_id, args.path, meta, sse_bed, bed_type) if not sse_bed_path: meta["hotspot"] = False meta["sse"] = True meta["sse_target_region_file"] = target_regions_bed_path sse_bed_path = register_bed_file(args.upload_id, args.path, meta, sse_bed, bed_type) if meta["is_ampliseq"]: # parse,process and convert the ampliseq plan.json to TS supported plan and post success, isUploadFailed, errMsg = ampliseq.convert_AS_to_TS_plan_and_post( meta, args, target_regions_bed_path, hotspots_bed_path, sse_bed_path, isBED_Encrypted=isBED_Encrypted, ) if isUploadFailed: print("ERROR: Could not create plan from this zip: %s." % str(errMsg)) raise Exception("validation error") if success: print("Plan Template created successfully.") else: api.update_meta(meta, args)