예제 #1
0
def pre_process():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file', type=argparse.FileType('r+'))
    
    try:
        args = parse.parse_args()
    except IOError as err:
        print("Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    meta = json.load(args.meta_file)
    meta.update({
        "is_ampliseq": None,
        "primary_bed": None,
        "hotspot_bed": None
    })

    is_zip = zipfile.is_zipfile(args.upload_file)
    if is_zip:
        files = unzip_archive(args.path, args.upload_file)
    else:
        files = [args.upload_file]
    
    if len(files) == 1 and files[0].endswith('.bed'):
        meta['is_ampliseq'] = False
        meta['primary_bed'] = files[0]
    elif "plan.json" in files:
        print("Found ampliseq")
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")))
        version, design = ampliseq.handle_versioned_plans(plan_data)
        meta['design'] = design
        plan = design['plan']
        try:
            meta['primary_bed'] = plan['designed_bed']
            meta['secondary_bed'] = plan['hotspot_bed']
            if 'reference' not in meta:
                meta['reference'] = plan['genome'].lower()
        except KeyError as err:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            raise
        print(meta)
    else:
        raise ValueError("Upload must be either valid Ampliseq export or contain a single BED file.")

    args.meta_file.truncate(0)
    args.meta_file.seek(0)
    json.dump(meta, args.meta_file)
    api.patch("contentupload", args.upload_id, meta=meta)
예제 #2
0
파일: pre_process.py 프로젝트: hgy851018/TS
def pre_process():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file', type=argparse.FileType('r+'))
    
    try:
        args = parse.parse_args()
    except IOError as err:
        print("Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    meta = json.load(args.meta_file, parse_float=Decimal)
    meta.update({
        "is_ampliseq": None,
        "primary_bed": None,
        "hotspot_bed": None
    })

    is_zip = zipfile.is_zipfile(args.upload_file)
    if is_zip:
        files = unzip_archive(args.path, args.upload_file)
    else:
        files = [args.upload_file]
    
    if len(files) == 1 and files[0].endswith('.bed'):
        meta['is_ampliseq'] = False
        meta['primary_bed'] = files[0]
    elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']:
        # convert vcf to bed
        target_filename = os.path.join(args.path,os.path.basename(files[0])) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % files[0]
        convert_command += '  --output-bed %s' % target_filename
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path??
        convert_command += '  --filter-bypass on'
        process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        for line in process.communicate()[0].splitlines():
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False
        meta['primary_bed'] = target_filename
    
        '''
        elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']:
        # convert vcf to bed
        target_filename = os.path.join(args.path,os.path.basename(files[0])) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % files[0]
        convert_command += '  --output-bed %s' % target_filename
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path??
        convert_command += '  --filter-bypass on'
        process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        for line in process.communicate()[0]:
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False
        meta['primary_bed'] = target_filename
        '''

    elif "plan.json" in files:
        print("Found ampliseq")
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")), parse_float=Decimal)
        version, design = ampliseq.handle_versioned_plans(plan_data)
        meta['design'] = design
        plan = design['plan']
        try:
            meta['primary_bed'] = plan['designed_bed']
            meta['secondary_bed'] = plan['hotspot_bed']
            if not meta.get("reference", None):
                meta['reference'] = design['genome'].lower()
        except KeyError as err:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            raise
        print(meta)
    else:
        raise ValueError("Upload must be either valid Ampliseq export or contain a single BED file.")

    args.meta_file.truncate(0)
    args.meta_file.seek(0)
    json.dump(meta, args.meta_file, cls=JSONEncoder)
    api.patch("contentupload", args.upload_id, meta=meta)
예제 #3
0
def main():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file')

    try:
        args = parse.parse_args()
    except IOError as err:
        print("ERROR: Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    with open(args.meta_file) as f:
        meta = json.load(f, parse_float=Decimal)

    files = meta.get('pre_process_files')

    target_regions_bed = None
    hotspots_bed = None
    meta['is_ampliseq'] = False

    if len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == False:
        target_regions_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        print "Content:        Target regions file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == True:
        hotspots_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        print "Content:        Hotspots file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.vcf') and meta.get('hotspot', False) == True:
        print "Content:        Hotspots file in VCF format"
        print
        print "Converting hotspot VCF file to BED: %s" % files[0]
        print

        hotspots_bed = os.path.basename(files[0]) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % os.path.join(args.path, os.path.basename(files[0]))
        convert_command += '  --output-bed %s' % os.path.join(args.path, hotspots_bed)
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (
            meta["reference"], meta["reference"])
        convert_command += '  --filter-bypass on'

        p = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        print p.communicate()[0]
        if p.returncode != 0:
            sys.exit(p.returncode)

        #process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        #for line in process.communicate()[0].splitlines():
        # api.post('log', upload='/rundb/api/v1/contentupload/%s/' %
        # str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False

    elif "plan.json" in files:
        print "Content:        AmpliSeq ZIP"
        print
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")))
        version, design, meta = ampliseq.handle_versioned_plans(plan_data, meta, args.path)

        meta['design'] = design

        try:
            target_regions_bed = design['plan']['designed_bed']
            hotspots_bed = design['plan']['hotspot_bed']
            if not meta.get("reference", None):
                meta['reference'] = design['genome']
            if 'design_name' in plan_data:
                meta['description'] = design['design_name']
            api.update_meta(meta, args)
        except KeyError as err:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            print "ERROR: Malformed AmpliSeq archive: missing json key "+str(err)
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err))
            #raise

        if target_regions_bed and target_regions_bed not in files:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            print "ERROR: Target region file %s not present in AmpliSeq archive" % target_regions_bed
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
            #         text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed)
            #raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed)

        if hotspots_bed and hotspots_bed not in files:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            print "ERROR: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
            #         text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)
            #raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)

    else:
        api.patch("contentupload", args.upload_id, status="Error: Unrecognized upload type.")
        print
        print "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file."
        sys.exit(1)

    ''' === Validate and Register === '''
    primary_path = None
    secondary_path = None

    if is_BED_encrypted(meta):
        if target_regions_bed:
            meta['design']['plan']['designed_bed'] = ''
        if hotspots_bed:
            meta['design']['plan']['hotspot_bed'] = ''
        primary_path = ""
        secondary_path = ""
    else:
        if target_regions_bed:
            primary_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED')
        if hotspots_bed:
            secondary_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED')

        meta["hotspot"] = False
        if target_regions_bed and not primary_path:
            register_bed_file(args.upload_id, args.path, meta, target_regions_bed)
        if hotspots_bed:
            meta["hotspot"] = True
            if not secondary_path:
                register_bed_file(args.upload_id, args.path, meta, hotspots_bed)

    if meta['is_ampliseq']:
        try:
            if not (is_BED_encrypted(meta)):
                if target_regions_bed and not primary_path:
                    primary_path = os.path.join(
                        args.path, meta["reference"]+"/unmerged/detail/"+target_regions_bed)
                if hotspots_bed and not secondary_path:
                    secondary_path = os.path.join(
                        args.path, meta["reference"]+"/unmerged/detail/"+hotspots_bed)
            else:
                run_type = meta['design']['plan'].get('runType', None)
                if run_type and (run_type == "AMPS_RNA"):
                    meta['reference'] = None
            plan_prototype, alignmentargs_override = plan_json(
                meta, args.upload_id, primary_path, secondary_path)
            success, response, content = api.post("plannedexperiment", **plan_prototype)

            if not success:
                api.patch("contentupload", args.upload_id, status="Error: unable to create TS Plan")
                err_content = json.loads(content)
                error_message_array = []
                if 'error' in err_content:
                    error_json = json.loads(str(err_content['error'][3:-2]))
                    for k in error_json:
                        for j in range(len(error_json[k])):
                            err_message = str(error_json[k][j])
                            err_message = err_message.replace('>', '>')
                            error_message_array.append(err_message)
                error_messages = ','.join(error_message_array)
                raise Exception(error_messages)
            if alignmentargs_override:
                content_dict = json.loads(content)
                api.patch("plannedexperiment", content_dict[
                          "id"], alignmentargs=alignmentargs_override, thumbnailalignmentargs=alignmentargs_override)
        except Exception as err:
            print("ERROR: Could not create plan from this zip: %s." % err)
            raise

    api.update_meta(meta, args)
예제 #4
0
파일: validate.py 프로젝트: LBragg/TS
def main():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file')
    
    try:
        args = parse.parse_args()
    except IOError as err:
        print("Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    meta_file_handle = open(args.meta_file,'r')
    meta = json.load(meta_file_handle, parse_float=Decimal)
    meta_file_handle.close()
    
    is_zip = zipfile.is_zipfile(args.upload_file)
    if is_zip:
        files = file_utils.unzip_archive(args.path, args.upload_file)
    elif args.upload_file.endswith('.gz'):
        files = [os.path.basename(args.upload_file[:-3])]
        cmd =  'gzip -dc %s > %s ' % (args.upload_file, os.path.join(args.path,files[0]))
        subprocess.call(cmd, shell=True)
    else:
        files = [args.upload_file]

    ''' Establish the upload type '''
    
    target_regions_bed = None
    hotspots_bed = None
    meta['is_ampliseq'] = False
    
    if len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot',False) == False:
        target_regions_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        
    elif len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot',False) == True:
        hotspots_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        
    elif len(files) == 1 and files[0].endswith('.vcf') and meta.get('hotspot',False) == True:
        hotspots_bed = os.path.basename(files[0]) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % os.path.join(args.path,os.path.basename(files[0]))
        convert_command += '  --output-bed %s' % os.path.join(args.path,hotspots_bed)
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path??
        convert_command += '  --filter-bypass on'
        process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        for line in process.communicate()[0].splitlines():
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False

    elif "plan.json" in files:
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")))
        version, design = ampliseq.handle_versioned_plans(plan_data, meta)
        meta['design'] = design
        try:
            target_regions_bed = design['plan']['designed_bed']
            hotspots_bed = design['plan']['hotspot_bed']
            if not meta.get("reference", None):
                meta['reference'] = design['genome'].lower()
            if 'design_name' in plan_data:
                meta['description'] = design['design_name']
            meta_file_handle = open(args.meta_file,'w')
            json.dump(meta, meta_file_handle, cls=JSONEncoder)
            meta_file_handle.close()
            api.patch("contentupload", args.upload_id, meta=meta)
        except KeyError as err:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err))
            raise

        if target_regions_bed and target_regions_bed not in files:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
                     text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed)
            raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed)

        if hotspots_bed and hotspots_bed not in files:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
                     text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)
            raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)

    else:
        api.patch("contentupload", args.upload_id, status="Error: Upload must be either valid Ampliseq export or contain a single BED or VCF file.")
        raise ValueError("Upload must be either valid Ampliseq export or contain a single BED or VCF file.")


    
    ''' === Validate and Register === '''
    primary_path = None
    secondary_path = None

    if target_regions_bed:
        primary_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED')
    if hotspots_bed:
        secondary_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED')

    meta["hotspot"] = False
    if target_regions_bed and not primary_path:
        register_bed_file(args.upload_id, args.path, meta, target_regions_bed)
    if hotspots_bed:
        meta["hotspot"] = True
        if not secondary_path:
            register_bed_file(args.upload_id, args.path, meta, hotspots_bed)

    if meta['is_ampliseq']:
        try:
            if target_regions_bed and not primary_path:
                primary_path = os.path.join(args.path, meta["reference"]+"/unmerged/detail/"+target_regions_bed)
            if hotspots_bed and not secondary_path:
                secondary_path = os.path.join(args.path, meta["reference"]+"/unmerged/detail/"+hotspots_bed)

            plan_prototype = plan_json(meta, primary_path, secondary_path)
            api.post("plannedexperiment", **plan_prototype)
        except Exception as err:
            print("Could not create plan from this zip: %s" % err)
    
    meta_file_handle = open(args.meta_file,'w')
    json.dump(meta, meta_file_handle, cls=JSONEncoder)
    meta_file_handle.close()
    api.patch("contentupload", args.upload_id, meta=meta)
예제 #5
0
def main():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file')

    try:
        args = parse.parse_args()
    except IOError as err:
        print("ERROR: Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    meta_file_handle = open(args.meta_file, 'r')
    meta = json.load(meta_file_handle, parse_float=Decimal)
    meta_file_handle.close()

    print "Uploaded file:  " + os.path.basename(args.upload_file)

    is_zip = zipfile.is_zipfile(args.upload_file)
    if is_zip:
        files = file_utils.unzip_archive(args.path, args.upload_file)
        print "Compressed:     Yes (zip)"
    elif args.upload_file.endswith('.gz'):
        print "Compressed:     Yes (gzip)"
        files = [os.path.basename(args.upload_file[:-3])]
        cmd = 'gzip -dc %s > %s ' % (args.upload_file,
                                     os.path.join(args.path, files[0]))
        p = subprocess.Popen(cmd,
                             stderr=subprocess.STDOUT,
                             stdout=subprocess.PIPE,
                             shell=True)
        print p.communicate()[0]
        if p.returncode != 0:
            sys.exit(p.returncode)

        subprocess.call(cmd, shell=True)
    else:
        print "Compressed:     No"
        files = [args.upload_file]
    ''' Establish the upload type '''

    target_regions_bed = None
    hotspots_bed = None
    meta['is_ampliseq'] = False

    if len(files) == 1 and files[0].endswith('.bed') and meta.get(
            'hotspot', False) == False:
        target_regions_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        print "Content:        Target regions file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.bed') and meta.get(
            'hotspot', False) == True:
        hotspots_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        print "Content:        Hotspots file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.vcf') and meta.get(
            'hotspot', False) == True:
        print "Content:        Hotspots file in VCF format"
        print
        print "Converting hotspot VCF file to BED: %s" % files[0]
        print

        hotspots_bed = os.path.basename(files[0]) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % os.path.join(
            args.path, os.path.basename(files[0]))
        convert_command += '  --output-bed %s' % os.path.join(
            args.path, hotspots_bed)
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (
            meta["reference"], meta["reference"])
        convert_command += '  --filter-bypass on'

        p = subprocess.Popen(convert_command,
                             stderr=subprocess.STDOUT,
                             stdout=subprocess.PIPE,
                             shell=True)
        print p.communicate()[0]
        if p.returncode != 0:
            sys.exit(p.returncode)

        #process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        #for line in process.communicate()[0].splitlines():
        #    api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False

    elif "plan.json" in files:
        print "Content:        AmpliSeq ZIP"
        print
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")))
        version, design, meta = ampliseq.handle_versioned_plans(
            plan_data, meta)
        meta['design'] = design
        try:
            target_regions_bed = design['plan']['designed_bed']
            hotspots_bed = design['plan']['hotspot_bed']
            if not meta.get("reference", None):
                meta['reference'] = design['genome'].lower()
            if 'design_name' in plan_data:
                meta['description'] = design['design_name']
            meta_file_handle = open(args.meta_file, 'w')
            json.dump(meta, meta_file_handle, cls=JSONEncoder)
            meta_file_handle.close()
            api.patch("contentupload", args.upload_id, meta=meta)
        except KeyError as err:
            api.patch("contentupload",
                      args.upload_id,
                      status="Error: malformed AmpliSeq archive")
            print "ERROR: Malformed AmpliSeq archive: missing json key " + str(
                err)
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err))
            #raise

        if target_regions_bed and target_regions_bed not in files:
            api.patch("contentupload",
                      args.upload_id,
                      status="Error: malformed AmpliSeq archive")
            print "ERROR: Target region file %s not present in AmpliSeq archive" % target_regions_bed
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
            #         text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed)
            #raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed)

        if hotspots_bed and hotspots_bed not in files:
            api.patch("contentupload",
                      args.upload_id,
                      status="Error: malformed AmpliSeq archive")
            print "ERROR: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
            #         text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)
            #raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)

    else:
        api.patch("contentupload",
                  args.upload_id,
                  status="Error: Unrecognized upload type.")
        print
        print "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file."
        sys.exit(1)
    ''' === Validate and Register === '''
    primary_path = None
    secondary_path = None

    if target_regions_bed:
        primary_path = validate(args.upload_id, args.path, meta,
                                target_regions_bed, 'target regions BED')
    if hotspots_bed:
        secondary_path = validate(args.upload_id, args.path, meta,
                                  hotspots_bed, 'hotspots BED')

    meta["hotspot"] = False
    if target_regions_bed and not primary_path:
        register_bed_file(args.upload_id, args.path, meta, target_regions_bed)
    if hotspots_bed:
        meta["hotspot"] = True
        if not secondary_path:
            register_bed_file(args.upload_id, args.path, meta, hotspots_bed)

    if meta['is_ampliseq']:
        try:
            if target_regions_bed and not primary_path:
                primary_path = os.path.join(
                    args.path, meta["reference"] + "/unmerged/detail/" +
                    target_regions_bed)
            if hotspots_bed and not secondary_path:
                secondary_path = os.path.join(
                    args.path,
                    meta["reference"] + "/unmerged/detail/" + hotspots_bed)

            plan_prototype = plan_json(meta, args.upload_id, primary_path,
                                       secondary_path)
            success, response, content = api.post("plannedexperiment",
                                                  **plan_prototype)
            if not success:
                api.patch("contentupload",
                          args.upload_id,
                          status="Error: unable to create TS Plan")
                raise Exception("Plan creation API request failed.")
        except Exception as err:
            print("ERROR: Could not create plan from this zip: %s" % err)
            raise

    meta_file_handle = open(args.meta_file, 'w')
    json.dump(meta, meta_file_handle, cls=JSONEncoder)
    meta_file_handle.close()
    api.patch("contentupload", args.upload_id, meta=meta)
예제 #6
0
파일: validate.py 프로젝트: stevematyas/TS
def main():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file')

    try:
        args = parse.parse_args()
    except IOError as err:
        print("ERROR: Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    with open(args.meta_file) as f:
        meta = json.load(f, parse_float=Decimal)

    files = meta.get('pre_process_files')

    target_regions_bed = None
    hotspots_bed = None
    sse_bed = None
    meta['is_ampliseq'] = False

    if len(files) == 1 and files[0].endswith('.bed') and meta.get(
            'hotspot', False) == False:
        target_regions_bed = os.path.basename(files[0])
        print "Content:        Target regions file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.bed') and meta.get(
            'hotspot', False) == True:
        hotspots_bed = os.path.basename(files[0])
        print "Content:        Hotspots file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.vcf') and meta.get(
            'hotspot', False) == True:
        print "Content:        Hotspots file in VCF format"
        print
        print "Converting hotspot VCF file to BED: %s" % files[0]
        print

        hotspots_bed = os.path.basename(files[0]) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % os.path.join(
            args.path, os.path.basename(files[0]))
        convert_command += '  --output-bed %s' % os.path.join(
            args.path, hotspots_bed)
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (
            meta["reference"], meta["reference"])
        convert_command += '  --filter-bypass on'

        p = subprocess.Popen(convert_command,
                             stderr=subprocess.STDOUT,
                             stdout=subprocess.PIPE,
                             shell=True)
        print p.communicate()[0]
        if p.returncode != 0:
            sys.exit(p.returncode)

        #process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        #for line in process.communicate()[0].splitlines():
        # api.post('log', upload='/rundb/api/v1/contentupload/%s/' %
        # str(args.upload_id), text=line.strip())
    elif "plan.json" in files:
        # Call the validation script from ampliseq. validate reference, target bed and hotspot
        print "Content:        AmpliSeq ZIP\n"

        meta['is_ampliseq'] = True
        isRefInstallInProgress, meta = ampliseq.validate_ampliSeq_bundle(
            meta, args)
        api.update_meta(meta, args)
        """
        If reference mentioned in the plan.json (get the info from "genome_reference") is not installed in the TS,
            - wait for the ref to be installed
            - the subtask finish_me will be called at the end of the reference install
            - process to restart validation of the upload
        """
        if isRefInstallInProgress:
            return

        target_regions_bed = meta['design']['plan'].get('designed_bed', '')
        hotspots_bed = meta['design']['plan'].get('hotspot_bed', '')
        sse_bed = meta['design']['plan'].get('sse_bed', '')
    else:
        api.patch("contentupload",
                  args.upload_id,
                  status="Error: Unrecognized upload type.")
        print
        print "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file."
        sys.exit(1)
    ''' === Validate and Register === '''
    target_regions_bed_path = ""
    hotspots_bed_path = ""
    sse_bed_path = ""

    isBED_Encrypted = is_BED_encrypted(meta)
    if target_regions_bed:
        if isBED_Encrypted:
            meta['design']['plan']['designed_bed'] = ''
        else:
            target_regions_bed_path = validate(args.upload_id, args.path, meta,
                                               target_regions_bed,
                                               'target regions BED')
            if not target_regions_bed_path:
                meta["hotspot"] = False
                target_regions_bed_path = register_bed_file(
                    args.upload_id, args.path, meta, target_regions_bed)

    if hotspots_bed:
        if isBED_Encrypted:
            meta['design']['plan']['hotspot_bed'] = ''
        else:
            hotspots_bed_path = validate(args.upload_id, args.path, meta,
                                         hotspots_bed, 'hotspots BED')
            if not hotspots_bed_path:
                meta["hotspot"] = True
                hotspots_bed_path = register_bed_file(args.upload_id,
                                                      args.path, meta,
                                                      hotspots_bed)

    if sse_bed:
        if isBED_Encrypted:
            meta['design']['plan']['sse_bed'] = ''
        else:
            sse_bed_path = validate(args.upload_id, args.path, meta, sse_bed,
                                    'SSE BED')
            if not sse_bed_path:
                meta["hotspot"] = False
                meta["sse"] = True
                meta["sse_target_region_file"] = target_regions_bed_path
                sse_bed_path = register_bed_file(args.upload_id, args.path,
                                                 meta, sse_bed)

    if meta['is_ampliseq']:
        if isBED_Encrypted:
            run_type = meta['design']['plan'].get('runType', '')
            if run_type == "AMPS_RNA":
                meta['reference'] = None
                api.update_meta(meta, args)

        # parse,process and convert the ampliseq plan.json to TS supported plan and post
        success, isUploadFailed, errMsg = ampliseq.convert_AS_to_TS_plan_and_post(
            meta, args, target_regions_bed_path, hotspots_bed_path,
            sse_bed_path)
        if isUploadFailed:
            print("ERROR: Could not create plan from this zip: %s." % errMsg)
            raise
    else:
        api.update_meta(meta, args)
예제 #7
0
def pre_process():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file', type=argparse.FileType('r+'))

    try:
        args = parse.parse_args()
    except IOError as err:
        print("Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    meta = json.load(args.meta_file, parse_float=Decimal)
    meta.update({
        "is_ampliseq": None,
        "primary_bed": None,
        "hotspot_bed": None
    })

    is_zip = zipfile.is_zipfile(args.upload_file)
    if is_zip:
        files = unzip_archive(args.path, args.upload_file)
    else:
        files = [args.upload_file]

    if len(files) == 1 and files[0].endswith('.bed'):
        meta['is_ampliseq'] = False
        meta['primary_bed'] = files[0]
    elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']:
        # convert vcf to bed
        target_filename = os.path.join(args.path, os.path.basename(
            files[0])) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % files[0]
        convert_command += '  --output-bed %s' % target_filename
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (
            meta["reference"], meta["reference"]
        )  #TODO: is this just name or full path??
        convert_command += '  --filter-bypass on'
        process = subprocess.Popen(convert_command,
                                   stderr=subprocess.STDOUT,
                                   stdout=subprocess.PIPE,
                                   shell=True)
        for line in process.communicate()[0].splitlines():
            api.post('log',
                     upload='/rundb/api/v1/contentupload/%s/' %
                     str(args.upload_id),
                     text=line.strip())
        meta['is_ampliseq'] = False
        meta['primary_bed'] = target_filename
        '''
        elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']:
        # convert vcf to bed
        target_filename = os.path.join(args.path,os.path.basename(files[0])) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % files[0]
        convert_command += '  --output-bed %s' % target_filename
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path??
        convert_command += '  --filter-bypass on'
        process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        for line in process.communicate()[0]:
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False
        meta['primary_bed'] = target_filename
        '''

    elif "plan.json" in files:
        print("Found ampliseq")
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")),
                              parse_float=Decimal)
        version, design = ampliseq.handle_versioned_plans(plan_data)
        meta['design'] = design
        plan = design['plan']
        try:
            meta['primary_bed'] = plan['designed_bed']
            meta['secondary_bed'] = plan['hotspot_bed']
            if not meta.get("reference", None):
                meta['reference'] = design['genome'].lower()
        except KeyError as err:
            api.patch("contentupload",
                      args.upload_id,
                      status="Error: malformed AmpliSeq archive")
            raise
        print(meta)
    else:
        raise ValueError(
            "Upload must be either valid Ampliseq export or contain a single BED file."
        )

    args.meta_file.truncate(0)
    args.meta_file.seek(0)
    json.dump(meta, args.meta_file, cls=JSONEncoder)
    api.patch("contentupload", args.upload_id, meta=meta)
예제 #8
0
파일: validate.py 프로젝트: biocyberman/TS
def main():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file')

    try:
        args = parse.parse_args()
    except IOError as err:
        print("ERROR: Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    with open(args.meta_file) as f:
        meta = json.load(f, parse_float=Decimal)

    files = meta.get('pre_process_files')

    target_regions_bed = None
    hotspots_bed = None
    sse_bed = None
    meta['is_ampliseq'] = False

    if len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == False:
        target_regions_bed = os.path.basename(files[0])
        print "Content:        Target regions file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == True:
        hotspots_bed = os.path.basename(files[0])
        print "Content:        Hotspots file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.vcf') and meta.get('hotspot', False) == True:
        print "Content:        Hotspots file in VCF format"
        print
        print "Converting hotspot VCF file to BED: %s" % files[0]
        print

        hotspots_bed = os.path.basename(files[0]) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % os.path.join(args.path, os.path.basename(files[0]))
        convert_command += '  --output-bed %s' % os.path.join(args.path, hotspots_bed)
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (
            meta["reference"], meta["reference"])
        convert_command += '  --filter-bypass on'

        p = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        print p.communicate()[0]
        if p.returncode != 0:
            sys.exit(p.returncode)

        #process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        #for line in process.communicate()[0].splitlines():
        # api.post('log', upload='/rundb/api/v1/contentupload/%s/' %
        # str(args.upload_id), text=line.strip())
    elif "plan.json" in files:
        # Call the validation script from ampliseq. validate reference, target bed and hotspot
        print "Content:        AmpliSeq ZIP\n"

        meta['is_ampliseq'] = True
        isRefInstallInProgress, meta = ampliseq.validate_ampliSeq_bundle(meta, args)
        api.update_meta(meta, args)
        """
        If reference mentioned in the plan.json (get the info from "genome_reference") is not installed in the TS,
            - wait for the ref to be installed
            - the subtask finish_me will be called at the end of the reference install
            - process to restart validation of the upload
        """
        if isRefInstallInProgress:
            return

        target_regions_bed = meta['design']['plan'].get('designed_bed','')
        hotspots_bed = meta['design']['plan'].get('hotspot_bed','')
        sse_bed = meta['design']['plan'].get('sse_bed','')
    else:
        api.patch("contentupload", args.upload_id, status="Error: Unrecognized upload type.")
        print
        print "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file."
        sys.exit(1)

    ''' === Validate and Register === '''
    target_regions_bed_path = ""
    hotspots_bed_path = ""
    sse_bed_path = ""

    isBED_Encrypted = is_BED_encrypted(meta)
    if target_regions_bed:
        if isBED_Encrypted:
            meta['design']['plan']['designed_bed'] = ''
        else:
            target_regions_bed_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED')
            if not target_regions_bed_path:
                meta["hotspot"] = False
                target_regions_bed_path = register_bed_file(args.upload_id, args.path, meta, target_regions_bed)
    
    if hotspots_bed:
        if isBED_Encrypted:
            meta['design']['plan']['hotspot_bed'] = ''
        else:
            hotspots_bed_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED')
            if not hotspots_bed_path:
                meta["hotspot"] = True
                hotspots_bed_path = register_bed_file(args.upload_id, args.path, meta, hotspots_bed)

    if sse_bed:
        if isBED_Encrypted:
            meta['design']['plan']['sse_bed'] = ''
        else:
            sse_bed_path = validate(args.upload_id, args.path, meta, sse_bed, 'SSE BED')
            if not sse_bed_path:
                meta["hotspot"] = False
                meta["sse"] = True
                meta["sse_target_region_file"] = target_regions_bed_path
                sse_bed_path = register_bed_file(args.upload_id, args.path, meta, sse_bed)

    if meta['is_ampliseq']:
        if isBED_Encrypted:
            run_type = meta['design']['plan'].get('runType', '')
            if run_type == "AMPS_RNA":
                meta['reference'] = None
                api.update_meta(meta, args)

        # parse,process and convert the ampliseq plan.json to TS supported plan and post
        success, isUploadFailed, errMsg = ampliseq.convert_AS_to_TS_plan_and_post(
                                                            meta,
                                                            args,
                                                            target_regions_bed_path,
                                                            hotspots_bed_path,
                                                            sse_bed_path
                                                        )
        if isUploadFailed:
            print("ERROR: Could not create plan from this zip: %s." % errMsg)
            raise
    else:
        api.update_meta(meta, args)
예제 #9
0
파일: validate.py 프로젝트: zjwang6/TS
def main():
    parse = argparse.ArgumentParser()
    parse.add_argument("upload_id", type=int)
    parse.add_argument("path")
    parse.add_argument("upload_file")
    parse.add_argument("meta_file")

    try:
        args = parse.parse_args()
    except IOError as err:
        print("ERROR: Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    with open(args.meta_file) as f:
        meta = json.load(f, parse_float=Decimal)

    files = meta.get("pre_process_files")

    target_regions_bed = None
    hotspots_bed = None
    sse_bed = None
    meta["is_ampliseq"] = False

    if (len(files) == 1 and files[0].endswith(".bed")
            and meta.get("hotspot", False) == False):
        target_regions_bed = os.path.basename(files[0])
        print("Content:        Target regions file in BED format")
        print()

    elif (len(files) == 1 and files[0].endswith(".bed")
          and meta.get("hotspot", False) == True):
        hotspots_bed = os.path.basename(files[0])
        print("Content:        Hotspots file in BED format")
        print()

    elif (len(files) == 1 and files[0].endswith(".vcf")
          and meta.get("hotspot", False) == True):
        print("Content:        Hotspots file in VCF format")
        print()
        print("Converting hotspot VCF file to BED: %s" % files[0])
        print()

        hotspots_bed = os.path.basename(files[0]) + ".bed"
        convert_command = "/usr/local/bin/tvcutils prepare_hotspots"
        convert_command += "  --input-vcf %s" % os.path.join(
            args.path, os.path.basename(files[0]))
        convert_command += "  --output-bed %s" % os.path.join(
            args.path, hotspots_bed)
        convert_command += (
            "  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta" %
            (meta["reference"], meta["reference"]))
        convert_command += "  --filter-bypass on"

        p = subprocess.Popen(
            convert_command,
            stderr=subprocess.STDOUT,
            stdout=subprocess.PIPE,
            shell=True,
        )
        print(p.communicate()[0])
        if p.returncode != 0:
            sys.exit(p.returncode)

        # process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        # for line in process.communicate()[0].splitlines():
        # api.post('log', upload='/rundb/api/v1/contentupload/%s/' %
        # str(args.upload_id), text=line.strip())
    elif "plan.json" in files:
        # Call the validation script from ampliseq. validate reference, target bed and hotspot
        print("Content:        AmpliSeq ZIP\n")

        meta["is_ampliseq"] = True
        isRefInstallInProgress, meta = ampliseq.validate_ampliSeq_bundle(
            meta, args)
        api.update_meta(meta, args)
        """
        If reference mentioned in the plan.json (get the info from "genome_reference") is not installed in the TS,
            - wait for the ref to be installed
            - the subtask finish_me will be called at the end of the reference install
            - process to restart validation of the upload
        """
        if isRefInstallInProgress:
            return

        target_regions_bed = meta["design"]["plan"].get("designed_bed", "")
        hotspots_bed = meta["design"]["plan"].get("hotspot_bed", "")
        sse_bed = meta["design"]["plan"].get("sse_bed", "")
    else:
        api.patch("contentupload",
                  args.upload_id,
                  status="Error: Unrecognized upload type.")
        print()
        print(
            "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file."
        )
        sys.exit(1)
    """ === Validate and Register === """
    target_regions_bed_path = ""
    hotspots_bed_path = ""
    sse_bed_path = ""

    isBED_Encrypted = is_BED_encrypted(meta)
    if target_regions_bed:
        if isBED_Encrypted:
            meta["design"]["plan"]["designed_bed"] = ""
        else:
            bed_type = publisher_types.TARGET
            target_regions_bed_path = validate(args.upload_id, args.path, meta,
                                               target_regions_bed, bed_type)
            if not target_regions_bed_path:
                meta["hotspot"] = False
                target_regions_bed_path = register_bed_file(
                    args.upload_id, args.path, meta, target_regions_bed,
                    bed_type)

    if hotspots_bed:
        if isBED_Encrypted:
            meta["design"]["plan"]["hotspot_bed"] = ""
        else:
            bed_type = publisher_types.HOTSPOT
            hotspots_bed_path = validate(args.upload_id, args.path, meta,
                                         hotspots_bed, bed_type)
            if not hotspots_bed_path:
                meta["hotspot"] = True
                hotspots_bed_path = register_bed_file(args.upload_id,
                                                      args.path, meta,
                                                      hotspots_bed, bed_type)

    if sse_bed:
        if isBED_Encrypted:
            meta["design"]["plan"]["sse_bed"] = ""
        else:
            bed_type = publisher_types.SSE
            sse_bed_path = validate(args.upload_id, args.path, meta, sse_bed,
                                    bed_type)
            if not sse_bed_path:
                meta["hotspot"] = False
                meta["sse"] = True
                meta["sse_target_region_file"] = target_regions_bed_path
                sse_bed_path = register_bed_file(args.upload_id, args.path,
                                                 meta, sse_bed, bed_type)

    if meta["is_ampliseq"]:
        # parse,process and convert the ampliseq plan.json to TS supported plan and post
        success, isUploadFailed, errMsg = ampliseq.convert_AS_to_TS_plan_and_post(
            meta,
            args,
            target_regions_bed_path,
            hotspots_bed_path,
            sse_bed_path,
            isBED_Encrypted=isBED_Encrypted,
        )
        if isUploadFailed:
            print("ERROR: Could not create plan from this zip: %s." %
                  str(errMsg))
            raise Exception("validation error")
        if success:
            print("Plan Template created successfully.")
    else:
        api.update_meta(meta, args)