Example #1
0
def deal_with_upload(meta, args):
    print("Dealing with the upload file")
    is_zip = zipfile.is_zipfile(args.upload_file)
    if is_zip:
        files = file_utils.unzip_archive(args.path, args.upload_file)
        print "Compressed:     Yes (zip)"
    elif args.upload_file.endswith('.gz'):
        print "Compressed:     Yes (gzip)"
        files = [os.path.basename(args.upload_file[:-3])]
        cmd = 'gzip -dc %s > %s ' % (args.upload_file,
                                     os.path.join(args.path, files[0]))
        p = subprocess.Popen(cmd,
                             stderr=subprocess.STDOUT,
                             stdout=subprocess.PIPE,
                             shell=True)
        print p.communicate()[0]
        if p.returncode != 0:
            sys.exit(p.returncode)

        subprocess.call(cmd, shell=True)
    else:
        print "Compressed:     No"
        files = [args.upload_file]
    meta['pre_process_files'] = files
    api.update_meta(meta, args)
    return files
Example #2
0
def deal_with_upload(meta, args):
    print("Dealing with the upload file")
    is_zip = zipfile.is_zipfile(args.upload_file)
    if is_zip:
        files = file_utils.unzip_archive(args.path, args.upload_file)
        print "Compressed:     Yes (zip)"
    elif args.upload_file.endswith('.gz'):
        print "Compressed:     Yes (gzip)"
        files = [os.path.basename(args.upload_file[:-3])]
        cmd =  'gzip -dc %s > %s ' % (args.upload_file, os.path.join(args.path,files[0]))
        p = subprocess.Popen(cmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        print p.communicate()[0]
        if p.returncode != 0:
            sys.exit(p.returncode)

        subprocess.call(cmd, shell=True)
    else:
        print "Compressed:     No"
        files = [args.upload_file]
    meta['pre_process_files'] = files
    api.update_meta(meta, args)
    return files
Example #3
0
def main():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file')
    
    try:
        args = parse.parse_args()
    except IOError as err:
        print("Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    meta_file_handle = open(args.meta_file,'r')
    meta = json.load(meta_file_handle, parse_float=Decimal)
    meta_file_handle.close()
    
    is_zip = zipfile.is_zipfile(args.upload_file)
    if is_zip:
        files = file_utils.unzip_archive(args.path, args.upload_file)
    elif args.upload_file.endswith('.gz'):
        files = [os.path.basename(args.upload_file[:-3])]
        cmd =  'gzip -dc %s > %s ' % (args.upload_file, os.path.join(args.path,files[0]))
        subprocess.call(cmd, shell=True)
    else:
        files = [args.upload_file]

    ''' Establish the upload type '''
    
    target_regions_bed = None
    hotspots_bed = None
    meta['is_ampliseq'] = False
    
    if len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot',False) == False:
        target_regions_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        
    elif len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot',False) == True:
        hotspots_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        
    elif len(files) == 1 and files[0].endswith('.vcf') and meta.get('hotspot',False) == True:
        hotspots_bed = os.path.basename(files[0]) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % os.path.join(args.path,os.path.basename(files[0]))
        convert_command += '  --output-bed %s' % os.path.join(args.path,hotspots_bed)
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path??
        convert_command += '  --filter-bypass on'
        process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        for line in process.communicate()[0].splitlines():
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False

    elif "plan.json" in files:
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")))
        version, design = ampliseq.handle_versioned_plans(plan_data, meta)
        meta['design'] = design
        try:
            target_regions_bed = design['plan']['designed_bed']
            hotspots_bed = design['plan']['hotspot_bed']
            if not meta.get("reference", None):
                meta['reference'] = design['genome'].lower()
            if 'design_name' in plan_data:
                meta['description'] = design['design_name']
            meta_file_handle = open(args.meta_file,'w')
            json.dump(meta, meta_file_handle, cls=JSONEncoder)
            meta_file_handle.close()
            api.patch("contentupload", args.upload_id, meta=meta)
        except KeyError as err:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err))
            raise

        if target_regions_bed and target_regions_bed not in files:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
                     text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed)
            raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed)

        if hotspots_bed and hotspots_bed not in files:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
                     text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)
            raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)

    else:
        api.patch("contentupload", args.upload_id, status="Error: Upload must be either valid Ampliseq export or contain a single BED or VCF file.")
        raise ValueError("Upload must be either valid Ampliseq export or contain a single BED or VCF file.")


    
    ''' === Validate and Register === '''
    primary_path = None
    secondary_path = None

    if target_regions_bed:
        primary_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED')
    if hotspots_bed:
        secondary_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED')

    meta["hotspot"] = False
    if target_regions_bed and not primary_path:
        register_bed_file(args.upload_id, args.path, meta, target_regions_bed)
    if hotspots_bed:
        meta["hotspot"] = True
        if not secondary_path:
            register_bed_file(args.upload_id, args.path, meta, hotspots_bed)

    if meta['is_ampliseq']:
        try:
            if target_regions_bed and not primary_path:
                primary_path = os.path.join(args.path, meta["reference"]+"/unmerged/detail/"+target_regions_bed)
            if hotspots_bed and not secondary_path:
                secondary_path = os.path.join(args.path, meta["reference"]+"/unmerged/detail/"+hotspots_bed)

            plan_prototype = plan_json(meta, primary_path, secondary_path)
            api.post("plannedexperiment", **plan_prototype)
        except Exception as err:
            print("Could not create plan from this zip: %s" % err)
    
    meta_file_handle = open(args.meta_file,'w')
    json.dump(meta, meta_file_handle, cls=JSONEncoder)
    meta_file_handle.close()
    api.patch("contentupload", args.upload_id, meta=meta)
Example #4
0
def main():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file')

    try:
        args = parse.parse_args()
    except IOError as err:
        print("ERROR: Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    meta_file_handle = open(args.meta_file, 'r')
    meta = json.load(meta_file_handle, parse_float=Decimal)
    meta_file_handle.close()

    print "Uploaded file:  " + os.path.basename(args.upload_file)

    is_zip = zipfile.is_zipfile(args.upload_file)
    if is_zip:
        files = file_utils.unzip_archive(args.path, args.upload_file)
        print "Compressed:     Yes (zip)"
    elif args.upload_file.endswith('.gz'):
        print "Compressed:     Yes (gzip)"
        files = [os.path.basename(args.upload_file[:-3])]
        cmd = 'gzip -dc %s > %s ' % (args.upload_file,
                                     os.path.join(args.path, files[0]))
        p = subprocess.Popen(cmd,
                             stderr=subprocess.STDOUT,
                             stdout=subprocess.PIPE,
                             shell=True)
        print p.communicate()[0]
        if p.returncode != 0:
            sys.exit(p.returncode)

        subprocess.call(cmd, shell=True)
    else:
        print "Compressed:     No"
        files = [args.upload_file]
    ''' Establish the upload type '''

    target_regions_bed = None
    hotspots_bed = None
    meta['is_ampliseq'] = False

    if len(files) == 1 and files[0].endswith('.bed') and meta.get(
            'hotspot', False) == False:
        target_regions_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        print "Content:        Target regions file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.bed') and meta.get(
            'hotspot', False) == True:
        hotspots_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        print "Content:        Hotspots file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.vcf') and meta.get(
            'hotspot', False) == True:
        print "Content:        Hotspots file in VCF format"
        print
        print "Converting hotspot VCF file to BED: %s" % files[0]
        print

        hotspots_bed = os.path.basename(files[0]) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % os.path.join(
            args.path, os.path.basename(files[0]))
        convert_command += '  --output-bed %s' % os.path.join(
            args.path, hotspots_bed)
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (
            meta["reference"], meta["reference"])
        convert_command += '  --filter-bypass on'

        p = subprocess.Popen(convert_command,
                             stderr=subprocess.STDOUT,
                             stdout=subprocess.PIPE,
                             shell=True)
        print p.communicate()[0]
        if p.returncode != 0:
            sys.exit(p.returncode)

        #process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        #for line in process.communicate()[0].splitlines():
        #    api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False

    elif "plan.json" in files:
        print "Content:        AmpliSeq ZIP"
        print
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")))
        version, design, meta = ampliseq.handle_versioned_plans(
            plan_data, meta)
        meta['design'] = design
        try:
            target_regions_bed = design['plan']['designed_bed']
            hotspots_bed = design['plan']['hotspot_bed']
            if not meta.get("reference", None):
                meta['reference'] = design['genome'].lower()
            if 'design_name' in plan_data:
                meta['description'] = design['design_name']
            meta_file_handle = open(args.meta_file, 'w')
            json.dump(meta, meta_file_handle, cls=JSONEncoder)
            meta_file_handle.close()
            api.patch("contentupload", args.upload_id, meta=meta)
        except KeyError as err:
            api.patch("contentupload",
                      args.upload_id,
                      status="Error: malformed AmpliSeq archive")
            print "ERROR: Malformed AmpliSeq archive: missing json key " + str(
                err)
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err))
            #raise

        if target_regions_bed and target_regions_bed not in files:
            api.patch("contentupload",
                      args.upload_id,
                      status="Error: malformed AmpliSeq archive")
            print "ERROR: Target region file %s not present in AmpliSeq archive" % target_regions_bed
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
            #         text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed)
            #raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed)

        if hotspots_bed and hotspots_bed not in files:
            api.patch("contentupload",
                      args.upload_id,
                      status="Error: malformed AmpliSeq archive")
            print "ERROR: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
            #         text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)
            #raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)

    else:
        api.patch("contentupload",
                  args.upload_id,
                  status="Error: Unrecognized upload type.")
        print
        print "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file."
        sys.exit(1)
    ''' === Validate and Register === '''
    primary_path = None
    secondary_path = None

    if target_regions_bed:
        primary_path = validate(args.upload_id, args.path, meta,
                                target_regions_bed, 'target regions BED')
    if hotspots_bed:
        secondary_path = validate(args.upload_id, args.path, meta,
                                  hotspots_bed, 'hotspots BED')

    meta["hotspot"] = False
    if target_regions_bed and not primary_path:
        register_bed_file(args.upload_id, args.path, meta, target_regions_bed)
    if hotspots_bed:
        meta["hotspot"] = True
        if not secondary_path:
            register_bed_file(args.upload_id, args.path, meta, hotspots_bed)

    if meta['is_ampliseq']:
        try:
            if target_regions_bed and not primary_path:
                primary_path = os.path.join(
                    args.path, meta["reference"] + "/unmerged/detail/" +
                    target_regions_bed)
            if hotspots_bed and not secondary_path:
                secondary_path = os.path.join(
                    args.path,
                    meta["reference"] + "/unmerged/detail/" + hotspots_bed)

            plan_prototype = plan_json(meta, args.upload_id, primary_path,
                                       secondary_path)
            success, response, content = api.post("plannedexperiment",
                                                  **plan_prototype)
            if not success:
                api.patch("contentupload",
                          args.upload_id,
                          status="Error: unable to create TS Plan")
                raise Exception("Plan creation API request failed.")
        except Exception as err:
            print("ERROR: Could not create plan from this zip: %s" % err)
            raise

    meta_file_handle = open(args.meta_file, 'w')
    json.dump(meta, meta_file_handle, cls=JSONEncoder)
    meta_file_handle.close()
    api.patch("contentupload", args.upload_id, meta=meta)