Beispiel #1
0
def exp_kwargs(d, folder):
    """Converts the output of `parse_log` to the a dictionary of
    keyword arguments needed to create an ``Experiment`` database object. """
    identical_fields = ("sample", "library", "cycles", "flows",)
    simple_maps = (
        ("experiment_name", "expName"),
        ("chiptype", "chipType"),
        ("chipbarcode", "chipBarcode"),
        ("user_notes", "notes"),
        ##("seqbarcode", "seqKitBarcode"),
        ("autoanalyze", "autoAnalyze"),
        ("prebeadfind", "usePreBeadfind"),
        ##("librarykeysequence", "libraryKey"),
        ("barcodeid", "barcodeId"),
        ("isReverseRun", "isReverseRun"),
    )
    full_maps = (
        ("pgmName", d.get('devicename', extract_rig(folder))),
        ("log", json.dumps(d, indent=4)),
        ("expDir", folder),
        ("unique", folder),
        ("baselineRun", d.get("runtype") == "STD" or d.get("runtype") == "Standard"),
        ("date", folder_mtime(folder)),
        ("storage_options", models.GlobalConfig.objects.all()[0].default_storage_options),
        ("flowsInOrder", getFlowOrder(d.get("image_map", ""))),
        ("reverse_primer", d.get('reverse_primer', 'Ion Kit')),
    )

    derive_attribute_list = ["libraryKey", "reverselibrarykey", "forward3primeadapter",
                             "reverse3primeadapter", "sequencekitname", "seqKitBarcode",
                             "sequencekitbarcode", "librarykitname", "librarykitbarcode",
                             "runMode"]

    ret = {}
    for f in identical_fields:
        ret[f] = d.get(f, '')
    for k1, k2 in simple_maps:
        ret[k2] = d.get(k1, '')
    for k, v in full_maps:
        ret[k] = v

    for attribute in derive_attribute_list:
        ret[attribute] = ''

    #N.B. this field is not used
    ret['storageHost'] = 'localhost'

    # If Flows keyword is defined in explog.txt...
    if ret['flows'] != "":
        # Cycles should be based on number of flows, not cycles published in log file
        # (Use of Cycles is deprecated in any case! We should be able to enter a random number here)
        ret['cycles'] = int(int(ret['flows']) / len(ret['flowsInOrder']))
    else:
        # ...if Flows is not defined in explog.txt:  (Very-old-dataset support)
        ret['flows'] = len(ret['flowsInOrder']) * int(ret['cycles'])
        logger.errors.warn("Flows keyword missing: Calculated Flows is %d" % int(ret['flows']))

    if ret['barcodeId'].lower() == 'none':
        ret['barcodeId'] = ''

    if len(d.get('blocks', [])) > 0:
        ret['rawdatastyle'] = 'tiled'
        ret['autoAnalyze'] = False
        for bs in d['blocks']:
            # Hack alert.  Watch how explogparser.parse_log munges these strings when detecting which one is the thumbnail entry
            # Only thumbnail will have 0,0 as first and second element of the string.
            if '0' in bs.split(',')[0] and '0' in bs.split(',')[1]:
                continue
            if auto_analyze_block(bs):
                ret['autoAnalyze'] = True
                logger.errors.debug("Block Run. Detected at least one block to auto-run analysis")
                break
        if ret['autoAnalyze'] is False:
            logger.errors.debug("Block Run. auto-run whole chip has not been specified")
    else:
        ret['rawdatastyle'] = 'single'

    planShortId = d.get("planned_run_short_id", '')

    #fix [TS-3064] for PGM backward compatibility
    if (planShortId is None or len(planShortId) == 0):
        planShortId = d.get("pending_run_short_id", '')

    selectedPlanGUId = d.get("planned_run_guid", '')

    logger.errors.debug("...planShortId=%s; selectedPlanGUId=%s" % (planShortId, selectedPlanGUId))
    print 'crawler: plannedRunShortId=', planShortId
    print 'crawler: plannedRunGUId=', selectedPlanGUId

    sequencingKitName = d.get("seqkitname", '')
    if sequencingKitName != "NOT_SCANNED":
        ret['sequencekitname'] = sequencingKitName

    #in rundb_experiment, there are 2 attributes for sequencingKitBarcode!!
    sequencingKitBarcode = d.get("seqkitpart", '')
    if sequencingKitBarcode != "NOT_SCANNED":
        ret['seqKitBarcode'] = sequencingKitBarcode
        ret['sequencekitbarcode'] = sequencingKitBarcode

    libraryKitName = d.get('libkit', '')
    if libraryKitName != "NOT_SCANNED":
        ret['librarykitname'] = libraryKitName

    libraryKitBarcode = d.get("libbarcode", '')
    if libraryKitBarcode != "NOT_SCANNED":
        ret['librarykitbarcode'] = libraryKitBarcode

    ##note: if PGM is running the old version, there is no isReverseRun in explog.txt.
    isReverseRun = d.get("isreverserun", '')
    if isReverseRun == "Yes":
        ret['isReverseRun'] = True
        ret['reverselibrarykey'] = ''
        ret["runMode"] = "pe"

        logger.errors.warn("PAIRED-END is NO LONGER SUPPORTED. Skipping experiment %s" % ret['expName'])
        return ret, False
    
    #Rules for applying the library key overrides:
    #1) If plan is used and library key is specified, use that value
    #2) Otherwise, if user has specified one on PGM's advanced page
    #   Validation required:
    #   Why: It could be left-over from a previous run and is not compatible with current run)
    #   How: It has to be pre-defined in db and is in the direction of the the new run.
    #   What: If it passes validation, use it
    #3) Otherwise, use system default for that direction
    #4) If plan is NOT used, and user has specified one in PGM's advanced page, do validation as above
    #5) If it passes validation, use it
    #6) Otherwise, use system default for that direction as defined in db
    #7) If the system default somehow has no value, we'll use the library key from
    #   PGM's advanced setup page
    isPlanFound = False
    planObj = None
    if selectedPlanGUId:
        try:
            planObj = models.PlannedExperiment.objects.get(planGUID=selectedPlanGUId)
            isPlanFound = True
            ret["runMode"] = planObj.runMode

            expName = d.get("experiment_name", '')
            #fix TS-4714: fail-safe measure, mark the plan executed if instrument somehow does not mark it as executed
            if (not planObj.planExecuted):
                logger.errors.warn("REPAIR: marking plan %s as executed for experiment %s" % (planObj.planGUID, expName))
                planObj.planExecuted = True

            planObj.expName = expName
            planObj.save()

        except models.PlannedExperiment.DoesNotExist:
            logger.errors.warn("No plan with GUId %s found in database " % selectedPlanGUId)
        except models.PlannedExperiment.MultipleObjectsReturned:
            logger.errors.warn("Multiple plan with GUId %s found in database " % selectedPlanGUId)
    else:
        if (planShortId and len(planShortId) > 0):
            try:
                #PGM should have set the plan as executed already
                #note: if instrument does not include plan GUID for the plan and does not mark the plan as executed,
                #crawler will not do any repair (to mark a plan as executed) and actually indexError will likely happen
                planObj = models.PlannedExperiment.objects.filter(planShortID=planShortId, planExecuted=True).order_by("-date")[0]
                isPlanFound = True
                ret["runMode"] = planObj.runMode

                planObj.expName = d.get("experiment_name", '')
                planObj.save()

                logger.errors.debug("...planShortId=%s is for runMode=%s; reverse=%s" % (planShortId, planObj.runMode, planObj.isReverseRun))
            except IndexError:
                logger.errors.warn("No plan with short id %s found in database " % planShortId)

    if not isPlanFound:
        #if user does not use a plan for the run, fetch the system default plan template, and clone it for this run

        isNeedSystemDefaultTemplate = False
        experiment = None

        #if we have already saved the experiment to db, and is using the cloned system default plan, explog will
        #not know that
        try:
            experiment = models.Experiment.objects.get(expName=d.get("experiment_name", ''))

            logger.errors.debug("crawler experiment already exists!! name=%s" % (d.get("experiment_name", '')))

        except models.Experiment.DoesNotExist:
            logger.errors.warn("expName: %s not yet in database and may need a sys default plan" % d.get("experiment_name", ''))

            #fix TS-4713 if a system default has somehow been cloned for this run, don't bother to clone again
            try:
                sysDefaultClones = models.PlannedExperiment.objects.filter(expName=d.get("experiment_name", ''))

                if sysDefaultClones:
                    #logger.errors.debug("SKIP cloning system default plan for %s since one already exists " % (d.get("experiment_name", '')))
                    isNeedSystemDefaultTemplate = False
                else:
                    isNeedSystemDefaultTemplate = True
            except:
                logger.errors.warn(traceback.format_exc())
                isNeedSystemDefaultTemplate = False
        except models.Experiment.MultipleObjectsReturned:
            #this should not happen since instrument assign uniques run name. But if it happens, don't bother to apply the
            #system default plan template to the experiment
            logger.errors.warn("multiple expName: %s found in database" % d.get("experiment_name", ''))
            isNeedSystemDefaultTemplate = False

        if isNeedSystemDefaultTemplate is True:
            try:
                explogChipType = ret['chipType'][:3]

                systemDefaultPlanTemplate = models.PlannedExperiment.objects.filter(isReusable=True, isSystem=True, isSystemDefault=True, chipType = None).order_by("-date")[0]

                try:
                    if explogChipType:
                        chipSystemDefaultPlanTemplate = models.PlannedExperiment.objects.filter(isReusable=True, isSystem=True, isSystemDefault=True, chipType=explogChipType).order_by("-date")[0]
                        systemDefaultPlanTemplate = chipSystemDefaultPlanTemplate
                        logger.errors.debug("Chip-specific system default plan template found in database for chip=%s; experiment=%s" % (explogChipType, d.get("experiment_name", '')))
                except IndexError:
                    logger.errors.debug("No chip-specific system default plan template found in database for chip=%s; experiment=%s. Going to use generic system default template instead." % (explogChipType, d.get("experiment_name", '')))

                planObj = copy.copy(systemDefaultPlanTemplate)
                planObj.pk = None
                planObj.planGUID = None
                planObj.planShortID = None
                planObj.isReusable = False
                planObj.isSystem = False
                planObj.isSystemDefault = False

                #fix TS-4664: include experiment name to system default clone
                expName = d.get("experiment_name", '')
                planObj.planName = "CopyOfSystemDefault_" + expName
                planObj.expName = expName

                planObj.planExecuted = True
                
                if not planObj.chipType:
                    planObj.chipType = ""
                    
                planObj.save()

                #clone the qc thresholds as well
                qcValues = systemDefaultPlanTemplate.plannedexperimentqc_set.all()

                for qcValue in qcValues:
                    qcObj = copy.copy(qcValue)

                    qcObj.pk = None
                    qcObj.plannedExperiment = planObj
                    qcObj.save()

                logger.errors.info("crawler AFTER SAVING SYSTEM DEFAULT CLONE %s for experiment=%s;" % (planObj.planName, expName))
                isPlanFound = True
                ret["runMode"] = planObj.runMode

            except IndexError:
                logger.errors.warn("No system default plan template found in database ")
            except:
                logger.errors.warn(traceback.format_exc())
                logger.errors.warn("Error in trying to use system default plan template for experiment=%s" % (d.get("experiment_name", '')))

    # planObj is initialized as None, which is an acceptable foreign key value
    # for Experiment.plan; however, by this point, we have either found a plan
    # or created one from the default plan template, so there should be a plan
    # and in all three cases, None, found plan, and default plan, we're ready
    # to commit the plan object to the Experiment.plan foreign key relationship.
    ret["plan"] = planObj

    #if PGM is running the old version, there is no isReverseRun in explog.txt. Check the plan if used
    if not isReverseRun:
        if isPlanFound:
            if planObj.isReverseRun:
                isReverseRun = "Yes"
            else:
                isReverseRun = "No"

    if isReverseRun == "Yes":
        ret['isReverseRun'] = True
        ret['reverselibrarykey'] = ''
        ret["runMode"] = "pe"

        logger.errors.warn("PAIRED-END is NO LONGER SUPPORTED. Skipping experiment %s" % ret['expName'])
        return ret, False
    else:
        ret['isReverseRun'] = False
        ret['libraryKey'] = ''

        if isPlanFound is False:
            ret["runMode"] = "single"

        defaultPairedEndForward3primeAdapter = None
        try:
            #Note: In v3.0, plan has concept of "runMode".
            #TS-4524: allow crawler to be more tolerant, especially PE 3' adapter is only used for PE run
            defaultPairedEndForward3primeAdapter = models.ThreePrimeadapter.objects.get(direction="Forward", name__iexact="Ion Paired End Fwd")

        except models.ThreePrimeadapter.DoesNotExist:
            logger.errors.warn("No default pairedEnd forward 3' adapter in database for experiment %s" % ret['expName'])
        except models.ThreePrimeadapter.MultipleObjectsReturned:
            logger.errors.warn("Multiple default pairedEnd forward 3' adapters found in database for experiment %s" % ret['expName'])

        try:
            #NOTE: In v2.2, there is no way to tell if a run (aka an experiment) is part of a paired-end forward run or not
            defaultForwardLibraryKey = models.LibraryKey.objects.get(direction='Forward', isDefault=True)
            defaultForward3primeAdapter = models.ThreePrimeadapter.objects.get(direction='Forward', isDefault=True)

            validatedPgmLibraryKey = None
            dbPgmLibraryKey = None
            pgmLibraryKey = d.get("librarykeysequence", '')
            #logger.errors.debug("...pgmLibraryKey is %s " % pgmLibraryKey)

            hasPassed = False
            if pgmLibraryKey is None or len(pgmLibraryKey) == 0:
                #logger.errors.debug("...pgmLibraryKey not specified. ")
                hasPassed = False
            else:
                dbPgmLibraryKeys = models.LibraryKey.objects.filter(sequence=pgmLibraryKey)

                if dbPgmLibraryKeys:
                    for dbKey in dbPgmLibraryKeys:
                        if dbKey.direction == "Forward":
                            #logger.errors.debug("...pgmLibraryKey %s has been validated for forward run" % pgmLibraryKey)
                            validatedPgmLibraryKey = dbKey
                            hasPassed = True
                            break
                else:
                    hasPassed = False

            #set default in case plan is not used or not found in db
            if hasPassed:
                #logger.errors.debug("...Default for forward run. Use PGM library key=%s " % validatedPgmLibraryKey.sequence)

                ret['libraryKey'] = validatedPgmLibraryKey.sequence
            else:
                #logger.errors.debug("...Default for forward run. Use default library key=%s " % defaultForwardLibraryKey.sequence)

                ret['libraryKey'] = defaultForwardLibraryKey.sequence

            ret['forward3primeadapter'] = defaultForward3primeAdapter.sequence

            if isPlanFound:
                #logger.errors.debug("...FORWARD plan is FOUND for planShortId=%s " % planShortId)

                if planObj.libraryKey:
                    #logger.errors.debug("...Plan used for forward run. Use plan library key=%s " % planObj.libraryKey)

                    ret['libraryKey'] = planObj.libraryKey
                else:
                    if hasPassed:
                        #logger.errors.debug("...Plan used for forward run. Use PGM library key=%s " % validatedPgmLibraryKey.sequence)

                        ret['libraryKey'] = validatedPgmLibraryKey.sequence
                    else:
                        #logger.errors.debug("...Plan used for forward run. Use default library key=%s " % defaultForwardLibraryKey.sequence)

                        ret['libraryKey'] = defaultForwardLibraryKey.sequence

                if planObj.forward3primeadapter:
                    ret['forward3primeadapter'] = planObj.forward3primeadapter
                else:
                    if (planObj.runMode == "pe"):
                        if defaultPairedEndForward3primeAdapter:
                            ret['forward3primeadapter'] = defaultPairedEndForward3primeAdapter.sequence
                        else:
                            ret['forward3primeadapter'] = ""
                    else:
                        ret['forward3primeadapter'] = defaultForward3primeAdapter.sequence
            else:
                if hasPassed:
                    #logger.errors.debug("...Plan used but not on db for forward run. Use PGM library key=%s " % validatedPgmLibraryKey.sequence)

                    ret['libraryKey'] = validatedPgmLibraryKey.sequence
                else:
                    #logger.errors.debug("...Plan used but not on db for forward run. Use default library key=%s " % defaultForwardLibraryKey.sequence)

                    ret['libraryKey'] = defaultForwardLibraryKey.sequence

                ret['forward3primeadapter'] = defaultForward3primeAdapter.sequence

            if ret['libraryKey'] is None or ret['libraryKey'] == "":
                #logger.errors.debug("...A library key cannot be determined for this FORWARD run  Use PGM default. ")
                ret['libraryKey'] = d.get("librarykeysequence", '')

        except models.LibraryKey.DoesNotExist:
            logger.errors.warn("No default forward library key in database for experiment %s" % ret['expName'])
            return ret, False
        except models.LibraryKey.MultipleObjectsReturned:
            logger.errors.warn("Multiple default forward library keys found in database for experiment %s" % ret['expName'])
            return ret, False
        except models.ThreePrimeadapter.DoesNotExist:
            logger.errors.warn("No default forward 3' adapter in database for experiment %s" % ret['expName'])
            return ret, False
        except models.ThreePrimeadapter.MultipleObjectsReturned:
            logger.errors.warn("Multiple default forward 3' adapters found in database for experiment %s" % ret['expName'])
            return ret, False
        except:
            logger.errors.warn("Experiment %s" % ret['expName'])
            logger.errors.warn(traceback.format_exc())
            return ret, False

    # Limit input sizes to defined field widths in models.py
    ret['notes'] = ret['notes'][:1024]
    ret['expDir'] = ret['expDir'][:512]
    ret['expName'] = ret['expName'][:128]
    ret['pgmName'] = ret['pgmName'][:64]
    ret['unique'] = ret['unique'][:512]
    ret['storage_options'] = ret['storage_options'][:200]
 #   ret['project'] = ret['project'][:64]
    ret['sample'] = ret['sample'][:64]
    ret['library'] = ret['library'][:64]
    ret['chipBarcode'] = ret['chipBarcode'][:64]
    ret['seqKitBarcode'] = ret['seqKitBarcode'][:64]
    ret['chipType'] = ret['chipType'][:32]
    ret['flowsInOrder'] = ret['flowsInOrder'][:512]
    ret['libraryKey'] = ret['libraryKey'][:64]
    ret['barcodeId'] = ret['barcodeId'][:128]
    ret['reverse_primer'] = ret['reverse_primer'][:128]
    ret['reverselibrarykey'] = ret['reverselibrarykey'][:64]
    ret['reverse3primeadapter'] = ret['reverse3primeadapter'][:512]
    ret['forward3primeadapter'] = ret['forward3primeadapter'][:512]
    ret['sequencekitbarcode'] = ret['sequencekitbarcode'][:512]
    ret['librarykitbarcode'] = ret['librarykitbarcode'][:512]
    ret['sequencekitname'] = ret['sequencekitname'][:512]
    ret['sequencekitbarcode'] = ret['sequencekitbarcode'][:512]
    ret['librarykitname'] = ret['librarykitname'][:512]
    ret['librarykitbarcode'] = ret['librarykitbarcode'][:512]
    ret['runMode'] = ret['runMode'][:64]

    logger.errors.debug("For experiment %s" % ret['expName'])
    logger.errors.debug("...Ready to save run: isReverseRun=%s;" % ret['isReverseRun'])
    logger.errors.debug("...Ready to save run: libraryKey=%s;" % ret['libraryKey'])
    logger.errors.debug("...Ready to save run: forward3primeadapter=%s;" % ret['forward3primeadapter'])
    logger.errors.debug("...Ready to save run: reverselibrarykey=%s;" % ret['reverselibrarykey'])
    logger.errors.debug("...Ready to save run: reverse3primeadapter=%s;" % ret['reverse3primeadapter'])

    return ret, True
Beispiel #2
0
def exp_kwargs(d, folder, logobj):
    """Converts the output of `parse_log` to the a dictionary of
    keyword arguments needed to create an ``Experiment`` database object.
    """
    identical_fields = ("sample", "cycles", "flows", "project",)
    simple_maps = (
        ("experiment_name", "expName"),
        ("chipbarcode", "chipBarcode"),
        ("user_notes", "notes"),
        ##("seqbarcode", "seqKitBarcode"),
        ("autoanalyze", "autoAnalyze"),
        ("prebeadfind", "usePreBeadfind"),
        ("librarykeysequence", "libraryKey"),
        ("barcodeid", "barcodeKitName"),
        ("isReverseRun", "isReverseRun"),
        ("library", "reference"),
    )

    chiptype = d.get('chiptype','')
    chipversion = d.get('chipversion','')
    if chipversion:
        chiptype = chipversion
    if chiptype.startswith('1.10'):
        chiptype = 'P1.1.17'
    elif chiptype.startswith('1.20'):
        chiptype = 'P1.2.18'

    full_maps = (
        ("chipType", chiptype),
        ("pgmName", d.get('devicename', extract_rig(folder))),
        ("log", json.dumps(d, indent=4)),
        ("expDir", folder),
        ("unique", folder),
        ("baselineRun", d.get("runtype") == "STD" or d.get("runtype") == "Standard"),
        ("date", explog_time(d.get("start_time", ""), folder)),
        ("storage_options", models.GlobalConfig.objects.all()[0].default_storage_options),
        ("flowsInOrder", getFlowOrder(d.get("image_map", ""))),
        ("reverse_primer", d.get('reverse_primer', 'Ion Kit')),
    )

    derive_attribute_list = ["sequencekitname", "seqKitBarcode", "sequencekitbarcode",
                             "libraryKitName", "libraryKitBarcode"]

    ret = {}
    for f in identical_fields:
        ret[f] = d.get(f, '')
    for k1, k2 in simple_maps:
        ret[k2] = d.get(k1, '')
    for k, v in full_maps:
        ret[k] = v

    for attribute in derive_attribute_list:
        ret[attribute] = ''

    #N.B. this field is not used
    ret['storageHost'] = 'localhost'

    # If Flows keyword is defined in explog.txt...
    if ret['flows'] != "":
        # Cycles should be based on number of flows, not cycles published in log file
        # (Use of Cycles is deprecated in any case! We should be able to enter a random number here)
        ret['cycles'] = int(int(ret['flows']) / len(ret['flowsInOrder']))
    else:
        # ...if Flows is not defined in explog.txt:  (Very-old-dataset support)
        ret['flows'] = len(ret['flowsInOrder']) * int(ret['cycles'])
        logobj.warn("Flows keyword missing: Calculated Flows is %d" % int(ret['flows']))

    if ret['barcodeKitName'].lower() == 'none':
        ret['barcodeKitName'] = ''

    if len(d.get('blocks', [])) > 0:
        ret['rawdatastyle'] = 'tiled'
        ret['autoAnalyze'] = False
        for bs in d['blocks']:
            # Hack alert.  Watch how explogparser.parse_log munges these strings when detecting which one is the thumbnail entry
            # Only thumbnail will have 0,0 as first and second element of the string.
            if '0' in bs.split(',')[0] and '0' in bs.split(',')[1]:
                continue
            if auto_analyze_block(bs, logobj):
                ret['autoAnalyze'] = True
                logobj.debug("Block Run. Detected at least one block to auto-run analysis")
                break
        if ret['autoAnalyze'] is False:
            logobj.debug("Block Run. auto-run whole chip has not been specified")
    else:
        ret['rawdatastyle'] = 'single'

    sequencingKitName = d.get("seqkitname", '')
    #do not replace plan's seqKit info if explog has blank seqkitname
    if sequencingKitName and sequencingKitName != "NOT_SCANNED":
        ret['sequencekitname'] = sequencingKitName

    #in rundb_experiment, there are 2 attributes for sequencingKitBarcode!!
    sequencingKitBarcode = d.get("seqkitpart", '')
    if sequencingKitBarcode and sequencingKitBarcode != "NOT_SCANNED":
        ret['seqKitBarcode'] = sequencingKitBarcode
        ret['sequencekitbarcode'] = sequencingKitBarcode

    libraryKitBarcode = d.get("libbarcode", '')
    if libraryKitBarcode and libraryKitBarcode != "NOT_SCANNED":
        ret['libraryKitBarcode'] = libraryKitBarcode

    libraryKitName = d.get('libkit', '')
    if libraryKitName and libraryKitName != "NOT_SCANNED":
        ret['libraryKitName'] = libraryKitName

    ##note: if PGM is running the old version, there is no isReverseRun in explog.txt.
    isReverseRun = d.get("isreverserun", '')
    if isReverseRun == "Yes":
        ret['isReverseRun'] = True
    else:
        ret['isReverseRun'] = False

    #instrument could have blank runType or be absent all together in explog
    runType = d.get('runtype', "")
    if not runType:
        runType = "GENS"
    ret['runType'] = runType

    # Limit input sizes to defined field widths in models.py
    ret['notes'] = ret['notes'][:1024]
    ret['expDir'] = ret['expDir'][:512]
    ret['expName'] = ret['expName'][:128]
    ret['pgmName'] = ret['pgmName'][:64]
    ret['unique'] = ret['unique'][:512]
    ret['storage_options'] = ret['storage_options'][:200]
    ret['project'] = ret['project'][:64]
    ret['sample'] = ret['sample'][:64]
    ret['reference'] = ret['reference'][:64]
    ret['chipBarcode'] = ret['chipBarcode'][:64]
    ret['seqKitBarcode'] = ret['seqKitBarcode'][:64]
    ret['chipType'] = ret['chipType'][:32]
    ret['flowsInOrder'] = ret['flowsInOrder'][:512]
    ret['libraryKey'] = ret['libraryKey'][:64]
    ret['barcodeKitName'] = ret['barcodeKitName'][:128]
    ret['reverse_primer'] = ret['reverse_primer'][:128]
    ret['sequencekitname'] = ret['sequencekitname'][:512]
    ret['sequencekitbarcode'] = ret['sequencekitbarcode'][:512]
    ret['libraryKitName'] = ret['libraryKitName'][:512]
    ret['libraryKitBarcode'] = ret['libraryKitBarcode'][:512]
    ret['runType'] = ret['runType'][:512]

    return ret
Beispiel #3
0
def exp_kwargs(d, folder):
    """Converts the output of `parse_log` to the a dictionary of
    keyword arguments needed to create an ``Experiment`` database object.
    """
    identical_fields = (
        "sample",
        "cycles",
        "flows",
        "project",
    )
    simple_maps = (
        ("experiment_name", "expName"),
        ("chiptype", "chipType"),
        ("chipbarcode", "chipBarcode"),
        ("user_notes", "notes"),
        ##("seqbarcode", "seqKitBarcode"),
        ("autoanalyze", "autoAnalyze"),
        ("prebeadfind", "usePreBeadfind"),
        ("librarykeysequence", "libraryKey"),
        ("barcodeid", "barcodeKitName"),
        ("isReverseRun", "isReverseRun"),
        ("library", "reference"),
    )
    full_maps = (
        ("pgmName", d.get('devicename', extract_rig(folder))),
        ("log", json.dumps(d, indent=4)),
        ("expDir", folder),
        ("unique", folder),
        ("baselineRun", d.get("runtype") == "STD"
         or d.get("runtype") == "Standard"),
        ("date", explog_time(d.get("start_time", ""), folder)),
        ("storage_options",
         models.GlobalConfig.objects.all()[0].default_storage_options),
        ("flowsInOrder", getFlowOrder(d.get("image_map", ""))),
        ("reverse_primer", d.get('reverse_primer', 'Ion Kit')),
    )

    derive_attribute_list = [
        "sequencekitname", "seqKitBarcode", "sequencekitbarcode",
        "libraryKitName", "libraryKitBarcode"
    ]

    ret = {}
    for f in identical_fields:
        ret[f] = d.get(f, '')
    for k1, k2 in simple_maps:
        ret[k2] = d.get(k1, '')
    for k, v in full_maps:
        ret[k] = v

    for attribute in derive_attribute_list:
        ret[attribute] = ''

    #N.B. this field is not used
    ret['storageHost'] = 'localhost'

    # If Flows keyword is defined in explog.txt...
    if ret['flows'] != "":
        # Cycles should be based on number of flows, not cycles published in log file
        # (Use of Cycles is deprecated in any case! We should be able to enter a random number here)
        ret['cycles'] = int(int(ret['flows']) / len(ret['flowsInOrder']))
    else:
        # ...if Flows is not defined in explog.txt:  (Very-old-dataset support)
        ret['flows'] = len(ret['flowsInOrder']) * int(ret['cycles'])
        logger.errors.warn("Flows keyword missing: Calculated Flows is %d" %
                           int(ret['flows']))

    if ret['barcodeKitName'].lower() == 'none':
        ret['barcodeKitName'] = ''

    if len(d.get('blocks', [])) > 0:
        ret['rawdatastyle'] = 'tiled'
        ret['autoAnalyze'] = False
        for bs in d['blocks']:
            # Hack alert.  Watch how explogparser.parse_log munges these strings when detecting which one is the thumbnail entry
            # Only thumbnail will have 0,0 as first and second element of the string.
            if '0' in bs.split(',')[0] and '0' in bs.split(',')[1]:
                continue
            if auto_analyze_block(bs):
                ret['autoAnalyze'] = True
                logger.errors.debug(
                    "Block Run. Detected at least one block to auto-run analysis"
                )
                break
        if ret['autoAnalyze'] is False:
            logger.errors.debug(
                "Block Run. auto-run whole chip has not been specified")
    else:
        ret['rawdatastyle'] = 'single'

    sequencingKitName = d.get("seqkitname", '')
    #do not replace plan's seqKit info if explog has blank seqkitname
    if sequencingKitName and sequencingKitName != "NOT_SCANNED":
        ret['sequencekitname'] = sequencingKitName

    #in rundb_experiment, there are 2 attributes for sequencingKitBarcode!!
    sequencingKitBarcode = d.get("seqkitpart", '')
    if sequencingKitBarcode and sequencingKitBarcode != "NOT_SCANNED":
        ret['seqKitBarcode'] = sequencingKitBarcode
        ret['sequencekitbarcode'] = sequencingKitBarcode

    libraryKitBarcode = d.get("libbarcode", '')
    if libraryKitBarcode and libraryKitBarcode != "NOT_SCANNED":
        ret['libraryKitBarcode'] = libraryKitBarcode

    libraryKitName = d.get('libkit', '')
    if libraryKitName and libraryKitName != "NOT_SCANNED":
        ret['libraryKitName'] = libraryKitName

    ##note: if PGM is running the old version, there is no isReverseRun in explog.txt.
    isReverseRun = d.get("isreverserun", '')
    if isReverseRun == "Yes":
        ret['isReverseRun'] = True
    else:
        ret['isReverseRun'] = False

    #instrument could have blank runType or be absent all together in explog
    runType = d.get('runtype', "")
    if not runType:
        runType = "GENS"
    ret['runType'] = runType

    # Limit input sizes to defined field widths in models.py
    ret['notes'] = ret['notes'][:1024]
    ret['expDir'] = ret['expDir'][:512]
    ret['expName'] = ret['expName'][:128]
    ret['pgmName'] = ret['pgmName'][:64]
    ret['unique'] = ret['unique'][:512]
    ret['storage_options'] = ret['storage_options'][:200]
    ret['project'] = ret['project'][:64]
    ret['sample'] = ret['sample'][:64]
    ret['reference'] = ret['reference'][:64]
    ret['chipBarcode'] = ret['chipBarcode'][:64]
    ret['seqKitBarcode'] = ret['seqKitBarcode'][:64]
    ret['chipType'] = ret['chipType'][:32]
    ret['flowsInOrder'] = ret['flowsInOrder'][:512]
    ret['libraryKey'] = ret['libraryKey'][:64]
    ret['barcodeKitName'] = ret['barcodeKitName'][:128]
    ret['reverse_primer'] = ret['reverse_primer'][:128]
    ret['sequencekitname'] = ret['sequencekitname'][:512]
    ret['sequencekitbarcode'] = ret['sequencekitbarcode'][:512]
    ret['libraryKitName'] = ret['libraryKitName'][:512]
    ret['libraryKitBarcode'] = ret['libraryKitBarcode'][:512]
    ret['runType'] = ret['runType'][:512]

    return ret