Ejemplo n.º 1
0
def check_reference(meta, args):
    """Check and install the needed reference genome"""
    print("Checking reference")
    plan_data = json.load(open(os.path.join(args.path, "plan.json")))
    version, design, meta = ampliseq.handle_versioned_plans(plan_data, meta)
    print("Got versioned stuff")
    # If we have a genome reference, check to see if it's installed
    reference = design.get('genome_reference', None)
    print(reference)
    if not reference:
        return False
    try:
        url = reference.get('uri')
        ref_hash = reference.get('files_md5sum', {}).get('fasta')
        short_name = reference.get('short_name')
        name = reference.get('name')
        notes = reference.get('notes', "AmpliSeq Import")
        print("Got various fields")
    except KeyError as err:
        # If the key does not exist, that's fine, but it can't exist and be corrupt
        print("Corrupt genome_reference entry: {0}".format(err))
        sys.exit(1)

    # The identity_hash matching the files_md5sum.fasta hash determines whether
    # or not the genome is installed
    print("Checking reference " + ref_hash)
    if not models.ReferenceGenome.objects.filter(identity_hash=ref_hash).exists():
        reference = models.ReferenceGenome(
            enabled = False,
            identity_hash = ref_hash,
            name = name,
            notes = notes,
            short_name = short_name,
            source = url,
            status = "downloading",
            index_version = "tmap-f3"
        )
        reference.save()
        print("created new reference")
        pub = models.Publisher.objects.get(name='BED')
        upload = models.ContentUpload.objects.get(pk=args.upload_id)
        # This is a celery subtask that will run the publisher scripts on this upload again
        finish_me = run_pub_scripts.si(pub, upload)
        print("About t set check point")
        set_checkpoint(meta, args)
        print("check point set")
        # With a status starting with "Waiting" the framework will stop
        # after pre_processing, before validate.
        upload.status = "Waiting on reference"
        upload.save()
        # the subtask finish_me will be called at the end of the reference install
        # process to restart validation of the upload
        start_reference_download(url, reference, callback=finish_me)
        print("Started reference download")
        return True
    print("exiting in shame")
    return False
Ejemplo n.º 2
0
def pre_process():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file', type=argparse.FileType('r+'))
    
    try:
        args = parse.parse_args()
    except IOError as err:
        print("Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    meta = json.load(args.meta_file)
    meta.update({
        "is_ampliseq": None,
        "primary_bed": None,
        "hotspot_bed": None
    })

    is_zip = zipfile.is_zipfile(args.upload_file)
    if is_zip:
        files = unzip_archive(args.path, args.upload_file)
    else:
        files = [args.upload_file]
    
    if len(files) == 1 and files[0].endswith('.bed'):
        meta['is_ampliseq'] = False
        meta['primary_bed'] = files[0]
    elif "plan.json" in files:
        print("Found ampliseq")
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")))
        version, design = ampliseq.handle_versioned_plans(plan_data)
        meta['design'] = design
        plan = design['plan']
        try:
            meta['primary_bed'] = plan['designed_bed']
            meta['secondary_bed'] = plan['hotspot_bed']
            if 'reference' not in meta:
                meta['reference'] = plan['genome'].lower()
        except KeyError as err:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            raise
        print(meta)
    else:
        raise ValueError("Upload must be either valid Ampliseq export or contain a single BED file.")

    args.meta_file.truncate(0)
    args.meta_file.seek(0)
    json.dump(meta, args.meta_file)
    api.patch("contentupload", args.upload_id, meta=meta)
Ejemplo n.º 3
0
def get_ampliseq_fixed_designs(user, password):
    h = httplib2.Http(disable_ssl_certificate_validation=settings.DEBUG)
    h.add_credentials(user, password)
    url = urlparse.urljoin(settings.AMPLISEQ_URL, "ws/tmpldesign/list/active")
    response, content = h.request(url)
    if response['status'] == '200':
        designs = json.loads(content)
        fixed = []
        for template in designs.get('TemplateDesigns', []):
            version, data, meta = ampliseq.handle_versioned_plans(template)
            fixed.append(data)
        return response, fixed
    else:
        return response, None
Ejemplo n.º 4
0
def check_reference(meta, args):
    print("Checking reference")
    plan_data = json.load(open(os.path.join(args.path, "plan.json")))
    version, design, meta = ampliseq.handle_versioned_plans(plan_data, meta)
    print("Got versioned stuff")
    reference = design.get('genome_reference', None)
    print(reference)
    if not reference:
        return False
    try:
        url = reference.get('uri')
        ref_hash = reference.get('files_md5sum', {}).get('fasta')
        short_name = reference.get('short_name')
        name = reference.get('name')
        notes = reference.get('notes', "AmpliSeq Import")
        print("Got various fields")
    except KeyError as err:
        print("Corrupt genome_reference entry: {0}".format(err))
        sys.exit(1)
    print("Checking reference " + ref_hash)
    if not models.ReferenceGenome.objects.filter(identity_hash=ref_hash).exists():
        reference = models.ReferenceGenome(
            enabled = False,
            identity_hash = ref_hash,
            name = name,
            notes = notes,
            short_name = short_name,
            source = url,
            status = "downloading",
            index_version = "tmap-f3"
        )
        reference.save()
        print("created new reference")
        pub = models.Publisher.objects.get(name='BED')
        upload = models.ContentUpload.objects.get(pk=args.upload_id)
        finish_me = run_pub_scripts.si(pub, upload)
        print("About t set check point")
        set_checkpoint(meta, args)
        print("check point set")
        upload.status = "Waiting on reference"
        upload.save()
        start_reference_download(url, reference, callback=finish_me)
        print("Started reference download")
        return True
    print("exiting in shame")
    return False
Ejemplo n.º 5
0
def get_ampliseq_designs(user, password):
    h = httplib2.Http(disable_ssl_certificate_validation=settings.DEBUG)
    h.add_credentials(user, password)
    url = urlparse.urljoin(settings.AMPLISEQ_URL, "ws/design/list")
    response, content = h.request(url)
    if response['status'] == '200':
        design_data = json.loads(content)
        designs = design_data.get('AssayDesigns', [])
        for design in designs:
            solutions = []
            for solution in design.get('DesignSolutions', []):
                version, data, meta = ampliseq.handle_versioned_plans(solution)
                solutions.append(data)
            design['DesignSolutions'] = solutions
        return response, designs
    else:
        return response, {}
Ejemplo n.º 6
0
def check_reference(meta, args):
    print("Checking reference")
    plan_data = json.load(open(os.path.join(args.path, "plan.json")))
    version, design, meta = ampliseq.handle_versioned_plans(plan_data, meta)
    print("Got versioned stuff")
    reference = design.get('genome_reference', None)
    print(reference)
    if not reference:
        return False
    try:
        url = reference.get('uri')
        ref_hash = reference.get('files_md5sum', {}).get('fasta')
        short_name = reference.get('short_name')
        name = reference.get('name')
        notes = reference.get('notes', "AmpliSeq Import")
        print("Got various fields")
    except KeyError as err:
        print("Corrupt genome_reference entry: {0}".format(err))
        sys.exit(1)
    print("Checking reference " + ref_hash)
    if not models.ReferenceGenome.objects.filter(
            identity_hash=ref_hash).exists():
        reference = models.ReferenceGenome(enabled=False,
                                           identity_hash=ref_hash,
                                           name=name,
                                           notes=notes,
                                           short_name=short_name,
                                           source=url,
                                           status="downloading",
                                           index_version="tmap-f3")
        reference.save()
        print("created new reference")
        pub = models.Publisher.objects.get(name='BED')
        upload = models.ContentUpload.objects.get(pk=args.upload_id)
        finish_me = run_pub_scripts.si(pub, upload)
        print("About t set check point")
        set_checkpoint(meta, args)
        print("check point set")
        upload.status = "Waiting on reference"
        upload.save()
        start_reference_download(url, reference, callback=finish_me)
        print("Started reference download")
        return True
    print("exiting in shame")
    return False
Ejemplo n.º 7
0
def pre_process():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file', type=argparse.FileType('r+'))
    
    try:
        args = parse.parse_args()
    except IOError as err:
        print("Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    meta = json.load(args.meta_file, parse_float=Decimal)
    meta.update({
        "is_ampliseq": None,
        "primary_bed": None,
        "hotspot_bed": None
    })

    is_zip = zipfile.is_zipfile(args.upload_file)
    if is_zip:
        files = unzip_archive(args.path, args.upload_file)
    else:
        files = [args.upload_file]
    
    if len(files) == 1 and files[0].endswith('.bed'):
        meta['is_ampliseq'] = False
        meta['primary_bed'] = files[0]
    elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']:
        # convert vcf to bed
        target_filename = os.path.join(args.path,os.path.basename(files[0])) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % files[0]
        convert_command += '  --output-bed %s' % target_filename
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path??
        convert_command += '  --filter-bypass on'
        process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        for line in process.communicate()[0].splitlines():
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False
        meta['primary_bed'] = target_filename
    
        '''
        elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']:
        # convert vcf to bed
        target_filename = os.path.join(args.path,os.path.basename(files[0])) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % files[0]
        convert_command += '  --output-bed %s' % target_filename
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path??
        convert_command += '  --filter-bypass on'
        process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        for line in process.communicate()[0]:
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False
        meta['primary_bed'] = target_filename
        '''

    elif "plan.json" in files:
        print("Found ampliseq")
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")), parse_float=Decimal)
        version, design = ampliseq.handle_versioned_plans(plan_data)
        meta['design'] = design
        plan = design['plan']
        try:
            meta['primary_bed'] = plan['designed_bed']
            meta['secondary_bed'] = plan['hotspot_bed']
            if not meta.get("reference", None):
                meta['reference'] = design['genome'].lower()
        except KeyError as err:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            raise
        print(meta)
    else:
        raise ValueError("Upload must be either valid Ampliseq export or contain a single BED file.")

    args.meta_file.truncate(0)
    args.meta_file.seek(0)
    json.dump(meta, args.meta_file, cls=JSONEncoder)
    api.patch("contentupload", args.upload_id, meta=meta)
Ejemplo n.º 8
0
def main():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file')

    try:
        args = parse.parse_args()
    except IOError as err:
        print("ERROR: Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    with open(args.meta_file) as f:
        meta = json.load(f, parse_float=Decimal)

    files = meta.get('pre_process_files')

    target_regions_bed = None
    hotspots_bed = None
    meta['is_ampliseq'] = False

    if len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == False:
        target_regions_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        print "Content:        Target regions file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == True:
        hotspots_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        print "Content:        Hotspots file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.vcf') and meta.get('hotspot', False) == True:
        print "Content:        Hotspots file in VCF format"
        print
        print "Converting hotspot VCF file to BED: %s" % files[0]
        print

        hotspots_bed = os.path.basename(files[0]) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % os.path.join(args.path, os.path.basename(files[0]))
        convert_command += '  --output-bed %s' % os.path.join(args.path, hotspots_bed)
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (
            meta["reference"], meta["reference"])
        convert_command += '  --filter-bypass on'

        p = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        print p.communicate()[0]
        if p.returncode != 0:
            sys.exit(p.returncode)

        #process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        #for line in process.communicate()[0].splitlines():
        # api.post('log', upload='/rundb/api/v1/contentupload/%s/' %
        # str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False

    elif "plan.json" in files:
        print "Content:        AmpliSeq ZIP"
        print
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")))
        version, design, meta = ampliseq.handle_versioned_plans(plan_data, meta, args.path)

        meta['design'] = design

        try:
            target_regions_bed = design['plan']['designed_bed']
            hotspots_bed = design['plan']['hotspot_bed']
            if not meta.get("reference", None):
                meta['reference'] = design['genome']
            if 'design_name' in plan_data:
                meta['description'] = design['design_name']
            api.update_meta(meta, args)
        except KeyError as err:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            print "ERROR: Malformed AmpliSeq archive: missing json key "+str(err)
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err))
            #raise

        if target_regions_bed and target_regions_bed not in files:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            print "ERROR: Target region file %s not present in AmpliSeq archive" % target_regions_bed
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
            #         text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed)
            #raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed)

        if hotspots_bed and hotspots_bed not in files:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            print "ERROR: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
            #         text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)
            #raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)

    else:
        api.patch("contentupload", args.upload_id, status="Error: Unrecognized upload type.")
        print
        print "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file."
        sys.exit(1)

    ''' === Validate and Register === '''
    primary_path = None
    secondary_path = None

    if is_BED_encrypted(meta):
        if target_regions_bed:
            meta['design']['plan']['designed_bed'] = ''
        if hotspots_bed:
            meta['design']['plan']['hotspot_bed'] = ''
        primary_path = ""
        secondary_path = ""
    else:
        if target_regions_bed:
            primary_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED')
        if hotspots_bed:
            secondary_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED')

        meta["hotspot"] = False
        if target_regions_bed and not primary_path:
            register_bed_file(args.upload_id, args.path, meta, target_regions_bed)
        if hotspots_bed:
            meta["hotspot"] = True
            if not secondary_path:
                register_bed_file(args.upload_id, args.path, meta, hotspots_bed)

    if meta['is_ampliseq']:
        try:
            if not (is_BED_encrypted(meta)):
                if target_regions_bed and not primary_path:
                    primary_path = os.path.join(
                        args.path, meta["reference"]+"/unmerged/detail/"+target_regions_bed)
                if hotspots_bed and not secondary_path:
                    secondary_path = os.path.join(
                        args.path, meta["reference"]+"/unmerged/detail/"+hotspots_bed)
            else:
                run_type = meta['design']['plan'].get('runType', None)
                if run_type and (run_type == "AMPS_RNA"):
                    meta['reference'] = None
            plan_prototype, alignmentargs_override = plan_json(
                meta, args.upload_id, primary_path, secondary_path)
            success, response, content = api.post("plannedexperiment", **plan_prototype)

            if not success:
                api.patch("contentupload", args.upload_id, status="Error: unable to create TS Plan")
                err_content = json.loads(content)
                error_message_array = []
                if 'error' in err_content:
                    error_json = json.loads(str(err_content['error'][3:-2]))
                    for k in error_json:
                        for j in range(len(error_json[k])):
                            err_message = str(error_json[k][j])
                            err_message = err_message.replace('>', '>')
                            error_message_array.append(err_message)
                error_messages = ','.join(error_message_array)
                raise Exception(error_messages)
            if alignmentargs_override:
                content_dict = json.loads(content)
                api.patch("plannedexperiment", content_dict[
                          "id"], alignmentargs=alignmentargs_override, thumbnailalignmentargs=alignmentargs_override)
        except Exception as err:
            print("ERROR: Could not create plan from this zip: %s." % err)
            raise

    api.update_meta(meta, args)
Ejemplo n.º 9
0
def main():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file')
    
    try:
        args = parse.parse_args()
    except IOError as err:
        print("Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    meta_file_handle = open(args.meta_file,'r')
    meta = json.load(meta_file_handle, parse_float=Decimal)
    meta_file_handle.close()
    
    is_zip = zipfile.is_zipfile(args.upload_file)
    if is_zip:
        files = file_utils.unzip_archive(args.path, args.upload_file)
    elif args.upload_file.endswith('.gz'):
        files = [os.path.basename(args.upload_file[:-3])]
        cmd =  'gzip -dc %s > %s ' % (args.upload_file, os.path.join(args.path,files[0]))
        subprocess.call(cmd, shell=True)
    else:
        files = [args.upload_file]

    ''' Establish the upload type '''
    
    target_regions_bed = None
    hotspots_bed = None
    meta['is_ampliseq'] = False
    
    if len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot',False) == False:
        target_regions_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        
    elif len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot',False) == True:
        hotspots_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        
    elif len(files) == 1 and files[0].endswith('.vcf') and meta.get('hotspot',False) == True:
        hotspots_bed = os.path.basename(files[0]) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % os.path.join(args.path,os.path.basename(files[0]))
        convert_command += '  --output-bed %s' % os.path.join(args.path,hotspots_bed)
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path??
        convert_command += '  --filter-bypass on'
        process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        for line in process.communicate()[0].splitlines():
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False

    elif "plan.json" in files:
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")))
        version, design = ampliseq.handle_versioned_plans(plan_data, meta)
        meta['design'] = design
        try:
            target_regions_bed = design['plan']['designed_bed']
            hotspots_bed = design['plan']['hotspot_bed']
            if not meta.get("reference", None):
                meta['reference'] = design['genome'].lower()
            if 'design_name' in plan_data:
                meta['description'] = design['design_name']
            meta_file_handle = open(args.meta_file,'w')
            json.dump(meta, meta_file_handle, cls=JSONEncoder)
            meta_file_handle.close()
            api.patch("contentupload", args.upload_id, meta=meta)
        except KeyError as err:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err))
            raise

        if target_regions_bed and target_regions_bed not in files:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
                     text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed)
            raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed)

        if hotspots_bed and hotspots_bed not in files:
            api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive")
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
                     text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)
            raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)

    else:
        api.patch("contentupload", args.upload_id, status="Error: Upload must be either valid Ampliseq export or contain a single BED or VCF file.")
        raise ValueError("Upload must be either valid Ampliseq export or contain a single BED or VCF file.")


    
    ''' === Validate and Register === '''
    primary_path = None
    secondary_path = None

    if target_regions_bed:
        primary_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED')
    if hotspots_bed:
        secondary_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED')

    meta["hotspot"] = False
    if target_regions_bed and not primary_path:
        register_bed_file(args.upload_id, args.path, meta, target_regions_bed)
    if hotspots_bed:
        meta["hotspot"] = True
        if not secondary_path:
            register_bed_file(args.upload_id, args.path, meta, hotspots_bed)

    if meta['is_ampliseq']:
        try:
            if target_regions_bed and not primary_path:
                primary_path = os.path.join(args.path, meta["reference"]+"/unmerged/detail/"+target_regions_bed)
            if hotspots_bed and not secondary_path:
                secondary_path = os.path.join(args.path, meta["reference"]+"/unmerged/detail/"+hotspots_bed)

            plan_prototype = plan_json(meta, primary_path, secondary_path)
            api.post("plannedexperiment", **plan_prototype)
        except Exception as err:
            print("Could not create plan from this zip: %s" % err)
    
    meta_file_handle = open(args.meta_file,'w')
    json.dump(meta, meta_file_handle, cls=JSONEncoder)
    meta_file_handle.close()
    api.patch("contentupload", args.upload_id, meta=meta)
Ejemplo n.º 10
0
def main():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file')

    try:
        args = parse.parse_args()
    except IOError as err:
        print("ERROR: Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    meta_file_handle = open(args.meta_file, 'r')
    meta = json.load(meta_file_handle, parse_float=Decimal)
    meta_file_handle.close()

    print "Uploaded file:  " + os.path.basename(args.upload_file)

    is_zip = zipfile.is_zipfile(args.upload_file)
    if is_zip:
        files = file_utils.unzip_archive(args.path, args.upload_file)
        print "Compressed:     Yes (zip)"
    elif args.upload_file.endswith('.gz'):
        print "Compressed:     Yes (gzip)"
        files = [os.path.basename(args.upload_file[:-3])]
        cmd = 'gzip -dc %s > %s ' % (args.upload_file,
                                     os.path.join(args.path, files[0]))
        p = subprocess.Popen(cmd,
                             stderr=subprocess.STDOUT,
                             stdout=subprocess.PIPE,
                             shell=True)
        print p.communicate()[0]
        if p.returncode != 0:
            sys.exit(p.returncode)

        subprocess.call(cmd, shell=True)
    else:
        print "Compressed:     No"
        files = [args.upload_file]
    ''' Establish the upload type '''

    target_regions_bed = None
    hotspots_bed = None
    meta['is_ampliseq'] = False

    if len(files) == 1 and files[0].endswith('.bed') and meta.get(
            'hotspot', False) == False:
        target_regions_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        print "Content:        Target regions file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.bed') and meta.get(
            'hotspot', False) == True:
        hotspots_bed = os.path.basename(files[0])
        meta['is_ampliseq'] = False
        print "Content:        Hotspots file in BED format"
        print

    elif len(files) == 1 and files[0].endswith('.vcf') and meta.get(
            'hotspot', False) == True:
        print "Content:        Hotspots file in VCF format"
        print
        print "Converting hotspot VCF file to BED: %s" % files[0]
        print

        hotspots_bed = os.path.basename(files[0]) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % os.path.join(
            args.path, os.path.basename(files[0]))
        convert_command += '  --output-bed %s' % os.path.join(
            args.path, hotspots_bed)
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (
            meta["reference"], meta["reference"])
        convert_command += '  --filter-bypass on'

        p = subprocess.Popen(convert_command,
                             stderr=subprocess.STDOUT,
                             stdout=subprocess.PIPE,
                             shell=True)
        print p.communicate()[0]
        if p.returncode != 0:
            sys.exit(p.returncode)

        #process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        #for line in process.communicate()[0].splitlines():
        #    api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False

    elif "plan.json" in files:
        print "Content:        AmpliSeq ZIP"
        print
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")))
        version, design, meta = ampliseq.handle_versioned_plans(
            plan_data, meta)
        meta['design'] = design
        try:
            target_regions_bed = design['plan']['designed_bed']
            hotspots_bed = design['plan']['hotspot_bed']
            if not meta.get("reference", None):
                meta['reference'] = design['genome'].lower()
            if 'design_name' in plan_data:
                meta['description'] = design['design_name']
            meta_file_handle = open(args.meta_file, 'w')
            json.dump(meta, meta_file_handle, cls=JSONEncoder)
            meta_file_handle.close()
            api.patch("contentupload", args.upload_id, meta=meta)
        except KeyError as err:
            api.patch("contentupload",
                      args.upload_id,
                      status="Error: malformed AmpliSeq archive")
            print "ERROR: Malformed AmpliSeq archive: missing json key " + str(
                err)
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err))
            #raise

        if target_regions_bed and target_regions_bed not in files:
            api.patch("contentupload",
                      args.upload_id,
                      status="Error: malformed AmpliSeq archive")
            print "ERROR: Target region file %s not present in AmpliSeq archive" % target_regions_bed
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
            #         text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed)
            #raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed)

        if hotspots_bed and hotspots_bed not in files:
            api.patch("contentupload",
                      args.upload_id,
                      status="Error: malformed AmpliSeq archive")
            print "ERROR: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed
            sys.exit(1)
            #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id),
            #         text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)
            #raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed)

    else:
        api.patch("contentupload",
                  args.upload_id,
                  status="Error: Unrecognized upload type.")
        print
        print "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file."
        sys.exit(1)
    ''' === Validate and Register === '''
    primary_path = None
    secondary_path = None

    if target_regions_bed:
        primary_path = validate(args.upload_id, args.path, meta,
                                target_regions_bed, 'target regions BED')
    if hotspots_bed:
        secondary_path = validate(args.upload_id, args.path, meta,
                                  hotspots_bed, 'hotspots BED')

    meta["hotspot"] = False
    if target_regions_bed and not primary_path:
        register_bed_file(args.upload_id, args.path, meta, target_regions_bed)
    if hotspots_bed:
        meta["hotspot"] = True
        if not secondary_path:
            register_bed_file(args.upload_id, args.path, meta, hotspots_bed)

    if meta['is_ampliseq']:
        try:
            if target_regions_bed and not primary_path:
                primary_path = os.path.join(
                    args.path, meta["reference"] + "/unmerged/detail/" +
                    target_regions_bed)
            if hotspots_bed and not secondary_path:
                secondary_path = os.path.join(
                    args.path,
                    meta["reference"] + "/unmerged/detail/" + hotspots_bed)

            plan_prototype = plan_json(meta, args.upload_id, primary_path,
                                       secondary_path)
            success, response, content = api.post("plannedexperiment",
                                                  **plan_prototype)
            if not success:
                api.patch("contentupload",
                          args.upload_id,
                          status="Error: unable to create TS Plan")
                raise Exception("Plan creation API request failed.")
        except Exception as err:
            print("ERROR: Could not create plan from this zip: %s" % err)
            raise

    meta_file_handle = open(args.meta_file, 'w')
    json.dump(meta, meta_file_handle, cls=JSONEncoder)
    meta_file_handle.close()
    api.patch("contentupload", args.upload_id, meta=meta)
Ejemplo n.º 11
0
def pre_process():
    parse = argparse.ArgumentParser()
    parse.add_argument('upload_id', type=int)
    parse.add_argument('path')
    parse.add_argument('upload_file')
    parse.add_argument('meta_file', type=argparse.FileType('r+'))

    try:
        args = parse.parse_args()
    except IOError as err:
        print("Input file error: %s" % err)
        parse.print_help()
        sys.exit(1)

    meta = json.load(args.meta_file, parse_float=Decimal)
    meta.update({
        "is_ampliseq": None,
        "primary_bed": None,
        "hotspot_bed": None
    })

    is_zip = zipfile.is_zipfile(args.upload_file)
    if is_zip:
        files = unzip_archive(args.path, args.upload_file)
    else:
        files = [args.upload_file]

    if len(files) == 1 and files[0].endswith('.bed'):
        meta['is_ampliseq'] = False
        meta['primary_bed'] = files[0]
    elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']:
        # convert vcf to bed
        target_filename = os.path.join(args.path, os.path.basename(
            files[0])) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % files[0]
        convert_command += '  --output-bed %s' % target_filename
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (
            meta["reference"], meta["reference"]
        )  #TODO: is this just name or full path??
        convert_command += '  --filter-bypass on'
        process = subprocess.Popen(convert_command,
                                   stderr=subprocess.STDOUT,
                                   stdout=subprocess.PIPE,
                                   shell=True)
        for line in process.communicate()[0].splitlines():
            api.post('log',
                     upload='/rundb/api/v1/contentupload/%s/' %
                     str(args.upload_id),
                     text=line.strip())
        meta['is_ampliseq'] = False
        meta['primary_bed'] = target_filename
        '''
        elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']:
        # convert vcf to bed
        target_filename = os.path.join(args.path,os.path.basename(files[0])) + '.bed'
        convert_command = '/usr/local/bin/tvcutils prepare_hotspots'
        convert_command += '  --input-vcf %s' % files[0]
        convert_command += '  --output-bed %s' % target_filename
        convert_command += '  --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path??
        convert_command += '  --filter-bypass on'
        process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
        for line in process.communicate()[0]:
            api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip())
        meta['is_ampliseq'] = False
        meta['primary_bed'] = target_filename
        '''

    elif "plan.json" in files:
        print("Found ampliseq")
        meta['is_ampliseq'] = True
        plan_data = json.load(open(os.path.join(args.path, "plan.json")),
                              parse_float=Decimal)
        version, design = ampliseq.handle_versioned_plans(plan_data)
        meta['design'] = design
        plan = design['plan']
        try:
            meta['primary_bed'] = plan['designed_bed']
            meta['secondary_bed'] = plan['hotspot_bed']
            if not meta.get("reference", None):
                meta['reference'] = design['genome'].lower()
        except KeyError as err:
            api.patch("contentupload",
                      args.upload_id,
                      status="Error: malformed AmpliSeq archive")
            raise
        print(meta)
    else:
        raise ValueError(
            "Upload must be either valid Ampliseq export or contain a single BED file."
        )

    args.meta_file.truncate(0)
    args.meta_file.seek(0)
    json.dump(meta, args.meta_file, cls=JSONEncoder)
    api.patch("contentupload", args.upload_id, meta=meta)