Esempio n. 1
0
def pre_process():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file', type=argparse.FileType('r+'))
    
    try:
        args = parse.parse_args()
    except IOError as err:
        print("Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    meta = json.load(args.meta_file)
    meta.update({
        "is_ampliseq": None,
        "primary_bed": None,
        "hotspot_bed": None
    })

    is_zip = zipfile.is_zipfile(args.upload_file)
    if is_zip:
        files = unzip_archive(args.path, args.upload_file)
    else:
        files = [args.upload_file]
    
    if len(files) == 1 and files[0].endswith('.bed'):
        meta['is_ampliseq'] = False
        meta['primary_bed'] = files[0]
    elif "plan.json" in files:
        print("Found ampliseq")
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")))
        version, design = ampliseq.handle_versioned_plans(plan_data)
        meta['design'] = design
        plan = design['plan']
        try:
            meta['primary_bed'] = plan['designed_bed']
            meta['secondary_bed'] = plan['hotspot_bed']
            if 'reference' not in meta:
                meta['reference'] = plan['genome'].lower()
        except KeyError as err:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            raise
        print(meta)
    else:
        raise ValueError("Upload must be either valid Ampliseq export or contain a single BED file.")

    args.meta_file.truncate(0)
    args.meta_file.seek(0)
    json.dump(meta, args.meta_file)
    api.patch("contentupload", args.upload_id, meta=meta)
Esempio n. 2
0
def pre_process():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file', type=argparse.FileType('r+'))
    
    try:
        args = parse.parse_args()
    except IOError as err:
        print("Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    meta = json.load(args.meta_file, parse_float=Decimal)
    meta.update({
        "is_ampliseq": None,
        "primary_bed": None,
        "hotspot_bed": None
    })

    is_zip = zipfile.is_zipfile(args.upload_file)
    if is_zip:
        files = unzip_archive(args.path, args.upload_file)
    else:
        files = [args.upload_file]
    
    if len(files) == 1 and files[0].endswith('.bed'):
        meta['is_ampliseq'] = False
        meta['primary_bed'] = files[0]
    elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']:
        # convert vcf to bed
        target_filename = os.path.join(args.path,os.path.basename(files[0])) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % files[0]
        convert_command += '  --output-bed %s' % target_filename
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path??
        convert_command += '  --filter-bypass on'
        process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        for line in process.communicate()[0].splitlines():
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False
        meta['primary_bed'] = target_filename
    
        '''
        elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']:
        # convert vcf to bed
        target_filename = os.path.join(args.path,os.path.basename(files[0])) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % files[0]
        convert_command += '  --output-bed %s' % target_filename
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path??
        convert_command += '  --filter-bypass on'
        process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        for line in process.communicate()[0]:
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False
        meta['primary_bed'] = target_filename
        '''

    elif "plan.json" in files:
        print("Found ampliseq")
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")), parse_float=Decimal)
        version, design = ampliseq.handle_versioned_plans(plan_data)
        meta['design'] = design
        plan = design['plan']
        try:
            meta['primary_bed'] = plan['designed_bed']
            meta['secondary_bed'] = plan['hotspot_bed']
            if not meta.get("reference", None):
                meta['reference'] = design['genome'].lower()
        except KeyError as err:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            raise
        print(meta)
    else:
        raise ValueError("Upload must be either valid Ampliseq export or contain a single BED file.")

    args.meta_file.truncate(0)
    args.meta_file.seek(0)
    json.dump(meta, args.meta_file, cls=JSONEncoder)
    api.patch("contentupload", args.upload_id, meta=meta)
Esempio n. 3
0
def main():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file')

    try:
        args = parse.parse_args()
    except IOError as err:
        print("ERROR: Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    with open(args.meta_file) as f:
        meta = json.load(f, parse_float=Decimal)

    files = meta.get('pre_process_files')

    target_regions_bed = None
    hotspots_bed = None
    meta['is_ampliseq'] = False

    if len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == False:
        target_regions_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        print "Content:        Target regions file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == True:
        hotspots_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        print "Content:        Hotspots file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.vcf') and meta.get('hotspot', False) == True:
        print "Content:        Hotspots file in VCF format"
        print
        print "Converting hotspot VCF file to BED: %s" % files[0]
        print

        hotspots_bed = os.path.basename(files[0]) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % os.path.join(args.path, os.path.basename(files[0]))
        convert_command += '  --output-bed %s' % os.path.join(args.path, hotspots_bed)
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (
            meta["reference"], meta["reference"])
        convert_command += '  --filter-bypass on'

        p = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        print p.communicate()[0]
        if p.returncode != 0:
            sys.exit(p.returncode)

        #process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        #for line in process.communicate()[0].splitlines():
        # api.post('log', upload='/rundb/api/v1/contentupload/%s/' %
        # str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False

    elif "plan.json" in files:
        print "Content:        AmpliSeq ZIP"
        print
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")))
        version, design, meta = ampliseq.handle_versioned_plans(plan_data, meta, args.path)

        meta['design'] = design

        try:
            target_regions_bed = design['plan']['designed_bed']
            hotspots_bed = design['plan']['hotspot_bed']
            if not meta.get("reference", None):
                meta['reference'] = design['genome']
            if 'design_name' in plan_data:
                meta['description'] = design['design_name']
            api.update_meta(meta, args)
        except KeyError as err:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            print "ERROR: Malformed AmpliSeq archive: missing json key "+str(err)
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err))
            #raise

        if target_regions_bed and target_regions_bed not in files:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            print "ERROR: Target region file %s not present in AmpliSeq archive" % target_regions_bed
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
            #         text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed)
            #raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed)

        if hotspots_bed and hotspots_bed not in files:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            print "ERROR: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
            #         text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)
            #raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)

    else:
        api.patch("contentupload", args.upload_id, status="Error: Unrecognized upload type.")
        print
        print "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file."
        sys.exit(1)

    ''' === Validate and Register === '''
    primary_path = None
    secondary_path = None

    if is_BED_encrypted(meta):
        if target_regions_bed:
            meta['design']['plan']['designed_bed'] = ''
        if hotspots_bed:
            meta['design']['plan']['hotspot_bed'] = ''
        primary_path = ""
        secondary_path = ""
    else:
        if target_regions_bed:
            primary_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED')
        if hotspots_bed:
            secondary_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED')

        meta["hotspot"] = False
        if target_regions_bed and not primary_path:
            register_bed_file(args.upload_id, args.path, meta, target_regions_bed)
        if hotspots_bed:
            meta["hotspot"] = True
            if not secondary_path:
                register_bed_file(args.upload_id, args.path, meta, hotspots_bed)

    if meta['is_ampliseq']:
        try:
            if not (is_BED_encrypted(meta)):
                if target_regions_bed and not primary_path:
                    primary_path = os.path.join(
                        args.path, meta["reference"]+"/unmerged/detail/"+target_regions_bed)
                if hotspots_bed and not secondary_path:
                    secondary_path = os.path.join(
                        args.path, meta["reference"]+"/unmerged/detail/"+hotspots_bed)
            else:
                run_type = meta['design']['plan'].get('runType', None)
                if run_type and (run_type == "AMPS_RNA"):
                    meta['reference'] = None
            plan_prototype, alignmentargs_override = plan_json(
                meta, args.upload_id, primary_path, secondary_path)
            success, response, content = api.post("plannedexperiment", **plan_prototype)

            if not success:
                api.patch("contentupload", args.upload_id, status="Error: unable to create TS Plan")
                err_content = json.loads(content)
                error_message_array = []
                if 'error' in err_content:
                    error_json = json.loads(str(err_content['error'][3:-2]))
                    for k in error_json:
                        for j in range(len(error_json[k])):
                            err_message = str(error_json[k][j])
                            err_message = err_message.replace('>', '>')
                            error_message_array.append(err_message)
                error_messages = ','.join(error_message_array)
                raise Exception(error_messages)
            if alignmentargs_override:
                content_dict = json.loads(content)
                api.patch("plannedexperiment", content_dict[
                          "id"], alignmentargs=alignmentargs_override, thumbnailalignmentargs=alignmentargs_override)
        except Exception as err:
            print("ERROR: Could not create plan from this zip: %s." % err)
            raise

    api.update_meta(meta, args)
Esempio n. 4
0
def main():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file')
    
    try:
        args = parse.parse_args()
    except IOError as err:
        print("Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    meta_file_handle = open(args.meta_file,'r')
    meta = json.load(meta_file_handle, parse_float=Decimal)
    meta_file_handle.close()
    
    is_zip = zipfile.is_zipfile(args.upload_file)
    if is_zip:
        files = file_utils.unzip_archive(args.path, args.upload_file)
    elif args.upload_file.endswith('.gz'):
        files = [os.path.basename(args.upload_file[:-3])]
        cmd =  'gzip -dc %s > %s ' % (args.upload_file, os.path.join(args.path,files[0]))
        subprocess.call(cmd, shell=True)
    else:
        files = [args.upload_file]

    ''' Establish the upload type '''
    
    target_regions_bed = None
    hotspots_bed = None
    meta['is_ampliseq'] = False
    
    if len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot',False) == False:
        target_regions_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        
    elif len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot',False) == True:
        hotspots_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        
    elif len(files) == 1 and files[0].endswith('.vcf') and meta.get('hotspot',False) == True:
        hotspots_bed = os.path.basename(files[0]) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % os.path.join(args.path,os.path.basename(files[0]))
        convert_command += '  --output-bed %s' % os.path.join(args.path,hotspots_bed)
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path??
        convert_command += '  --filter-bypass on'
        process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        for line in process.communicate()[0].splitlines():
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False

    elif "plan.json" in files:
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")))
        version, design = ampliseq.handle_versioned_plans(plan_data, meta)
        meta['design'] = design
        try:
            target_regions_bed = design['plan']['designed_bed']
            hotspots_bed = design['plan']['hotspot_bed']
            if not meta.get("reference", None):
                meta['reference'] = design['genome'].lower()
            if 'design_name' in plan_data:
                meta['description'] = design['design_name']
            meta_file_handle = open(args.meta_file,'w')
            json.dump(meta, meta_file_handle, cls=JSONEncoder)
            meta_file_handle.close()
            api.patch("contentupload", args.upload_id, meta=meta)
        except KeyError as err:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err))
            raise

        if target_regions_bed and target_regions_bed not in files:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
                     text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed)
            raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed)

        if hotspots_bed and hotspots_bed not in files:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
                     text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)
            raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)

    else:
        api.patch("contentupload", args.upload_id, status="Error: Upload must be either valid Ampliseq export or contain a single BED or VCF file.")
        raise ValueError("Upload must be either valid Ampliseq export or contain a single BED or VCF file.")


    
    ''' === Validate and Register === '''
    primary_path = None
    secondary_path = None

    if target_regions_bed:
        primary_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED')
    if hotspots_bed:
        secondary_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED')

    meta["hotspot"] = False
    if target_regions_bed and not primary_path:
        register_bed_file(args.upload_id, args.path, meta, target_regions_bed)
    if hotspots_bed:
        meta["hotspot"] = True
        if not secondary_path:
            register_bed_file(args.upload_id, args.path, meta, hotspots_bed)

    if meta['is_ampliseq']:
        try:
            if target_regions_bed and not primary_path:
                primary_path = os.path.join(args.path, meta["reference"]+"/unmerged/detail/"+target_regions_bed)
            if hotspots_bed and not secondary_path:
                secondary_path = os.path.join(args.path, meta["reference"]+"/unmerged/detail/"+hotspots_bed)

            plan_prototype = plan_json(meta, primary_path, secondary_path)
            api.post("plannedexperiment", **plan_prototype)
        except Exception as err:
            print("Could not create plan from this zip: %s" % err)
    
    meta_file_handle = open(args.meta_file,'w')
    json.dump(meta, meta_file_handle, cls=JSONEncoder)
    meta_file_handle.close()
    api.patch("contentupload", args.upload_id, meta=meta)
Esempio n. 5
0
def main():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file')

    try:
        args = parse.parse_args()
    except IOError as err:
        print("ERROR: Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    with open(args.meta_file) as f:
        meta = json.load(f, parse_float=Decimal)

    files = meta.get('pre_process_files')

    target_regions_bed = None
    hotspots_bed = None
    sse_bed = None
    meta['is_ampliseq'] = False

    if len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == False:
        target_regions_bed = os.path.basename(files[0])
        print "Content:        Target regions file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == True:
        hotspots_bed = os.path.basename(files[0])
        print "Content:        Hotspots file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.vcf') and meta.get('hotspot', False) == True:
        print "Content:        Hotspots file in VCF format"
        print
        print "Converting hotspot VCF file to BED: %s" % files[0]
        print

        hotspots_bed = os.path.basename(files[0]) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % os.path.join(args.path, os.path.basename(files[0]))
        convert_command += '  --output-bed %s' % os.path.join(args.path, hotspots_bed)
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (
            meta["reference"], meta["reference"])
        convert_command += '  --filter-bypass on'

        p = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        print p.communicate()[0]
        if p.returncode != 0:
            sys.exit(p.returncode)

        #process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        #for line in process.communicate()[0].splitlines():
        # api.post('log', upload='/rundb/api/v1/contentupload/%s/' %
        # str(args.upload_id), text=line.strip())
    elif "plan.json" in files:
        # Call the validation script from ampliseq. validate reference, target bed and hotspot
        print "Content:        AmpliSeq ZIP\n"

        meta['is_ampliseq'] = True
        isRefInstallInProgress, meta = ampliseq.validate_ampliSeq_bundle(meta, args)
        api.update_meta(meta, args)
        """
        If reference mentioned in the plan.json (get the info from "genome_reference") is not installed in the TS,
            - wait for the ref to be installed
            - the subtask finish_me will be called at the end of the reference install
            - process to restart validation of the upload
        """
        if isRefInstallInProgress:
            return

        target_regions_bed = meta['design']['plan'].get('designed_bed','')
        hotspots_bed = meta['design']['plan'].get('hotspot_bed','')
        sse_bed = meta['design']['plan'].get('sse_bed','')
    else:
        api.patch("contentupload", args.upload_id, status="Error: Unrecognized upload type.")
        print
        print "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file."
        sys.exit(1)

    ''' === Validate and Register === '''
    target_regions_bed_path = ""
    hotspots_bed_path = ""
    sse_bed_path = ""

    isBED_Encrypted = is_BED_encrypted(meta)
    if target_regions_bed:
        if isBED_Encrypted:
            meta['design']['plan']['designed_bed'] = ''
        else:
            target_regions_bed_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED')
            if not target_regions_bed_path:
                meta["hotspot"] = False
                target_regions_bed_path = register_bed_file(args.upload_id, args.path, meta, target_regions_bed)
    
    if hotspots_bed:
        if isBED_Encrypted:
            meta['design']['plan']['hotspot_bed'] = ''
        else:
            hotspots_bed_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED')
            if not hotspots_bed_path:
                meta["hotspot"] = True
                hotspots_bed_path = register_bed_file(args.upload_id, args.path, meta, hotspots_bed)

    if sse_bed:
        if isBED_Encrypted:
            meta['design']['plan']['sse_bed'] = ''
        else:
            sse_bed_path = validate(args.upload_id, args.path, meta, sse_bed, 'SSE BED')
            if not sse_bed_path:
                meta["hotspot"] = False
                meta["sse"] = True
                meta["sse_target_region_file"] = target_regions_bed_path
                sse_bed_path = register_bed_file(args.upload_id, args.path, meta, sse_bed)

    if meta['is_ampliseq']:
        if isBED_Encrypted:
            run_type = meta['design']['plan'].get('runType', '')
            if run_type == "AMPS_RNA":
                meta['reference'] = None
                api.update_meta(meta, args)

        # parse,process and convert the ampliseq plan.json to TS supported plan and post
        success, isUploadFailed, errMsg = ampliseq.convert_AS_to_TS_plan_and_post(
                                                            meta,
                                                            args,
                                                            target_regions_bed_path,
                                                            hotspots_bed_path,
                                                            sse_bed_path
                                                        )
        if isUploadFailed:
            print("ERROR: Could not create plan from this zip: %s." % errMsg)
            raise
    else:
        api.update_meta(meta, args)