def copyRawRiles(ct, dest=None):
    sampList = ct.dct['Sample']
    if not dest:
        proj = ct.db.getAttrFromSamp('project_name', sampList[0])
        dest = '/isiseqruns/GUP_Deliveries/Sub_0{}_RawFQ_{}'.format(
            subNum, proj)
    fcid_ln = list(set([(
        ct.db.getAttrFromSamp('fcid', s),
        ct.db.getAttrFromSamp('flowcell_lane', s))
        for s in sampList]))
    for (fcid, ln) in fcid_ln:
        bcl_ct = calc_tuple.CalcTuple(
            db=ct.db, node='Bcl2fastq', fcid=fcid, laneNum=ln)
        subSampList = [s for s in sampList if
                       s in ct.db.getAllSamples(**bcl_ct.dct)]
        for samp in subSampList:
            odir = bcl_ct.getSampOutdir(samp)
            intermedDir = os.path.relpath(
                odir, os.path.join(config.GUP_HOME, 'RUNS'))
            toDir = os.path.join(dest, intermedDir)
            subprocess.call(
                ['mkdir', '-p', '-m', '777', os.path.split(toDir)[0]])
            try:
                shutil.copytree(odir, toDir)
            except BaseException:
                print "{} already there?".format(sampSrc)
 def buildCalcTupleForArbitraryPooling(self, **kwargs):
     pool = kwargs.pop('pool')
     subNum = getSubNumFromPoolDict(pool)
     if 'subNumPrefix' in kwargs and kwargs['subNumPrefix']:
         subNum = kwargs['subNumPrefix'] + '_' + subNum
     ct = calc_tuple.CalcTuple(
         db=self.db,
         node='Collate',
         fcid=self.fcid,
         subNum=subNum,
         poolId=sorted(pool.keys()),
         pool=pool,
         **kwargs)
     repSamp = ct.dct['pool'].values()[0]
     # TODO a) make this hack a method of collate ct, or b) singleton pools
     # get encased in list
     if isinstance(repSamp, list):
         repSamp = repSamp[0]
     self.calcTuples.append(ct)
     _outdir = p_join(
         self.dirPref,
         "Sub_{0}_{1}_{2}__{3}".format(
             subNum,
             ct.getRefGenome(),
             self.db.getAttrFromSamp(
                 'project_name',
                 repSamp),
             ct.hsh[
                 :config.ODIR_HSH_LEN]))
     if self.tryToLoadFinishedCalc(ct):
         return
     self.buildCalcInfoWithErrAndWrn(ct, _outdir)
     ct.putMetadata(Sample=None)
     subprocess.call(['mkdir', '-p', '-m', '777',
                      ct.getMetadata('outdir')])
 def getUpstream(self, node=None):
     'similar to FilterCalcTuple but no ReadRescue'
     dct = self.dct.copy()
     samp = dct.pop('Sample')
     dct['fcid'] = self.db.getAttrFromSamp('fcid', samp)
     dct['laneNum'] = self.db.getAttrFromSamp('flowcell_lane', samp)
     return calc_tuple.CalcTuple(db=self.db, node='Bcl2fastq', **dct)
 def buildCalcTupleForSampList(self, **kwargs):
     assert kwargs['subNumPrefix'], "need it for sampList collate."
     ct = calc_tuple.CalcTuple(
         db=self.db,
         node='Collate',
         fcid=self.fcid,
         **kwargs)
     self.calcTuples.append(ct)
     if self.tryToLoadFinishedCalc(ct):
         return
     _outdir = p_join(
         self.dirPref,
         "Sub_{0}_{1}_{2}".format(
             ct.dct['subNum'],
             ct.getRefGenome(),
             ct.hsh[:config.ODIR_HSH_LEN])
     )
     self.buildCalcInfoWithErrAndWrn(ct, _outdir)
     ct.putMetadata(Sample=ct.dct['Sample'])
     subprocess.call(['mkdir', '-p', '-m', '777',
                      ct.getMetadata('outdir')])
    def buildCalcTuples(self, **kwargs):
        """ We get the submission numbers, and build a dictionary mapping
        each subNum's sample list. Finally we check if there are multiple
        species, making a calcTuple for each unique genome.

        Planning on three use cases at the moment:
        1) FCID - group all samples in this flowcell by their submission number
        2) By SubNum - Ideally this would span multiple fcids, now just a
                filter. TODO JWS
        3) Custom Pool - This generates its own specific hexidecimal subNum

        There's also three cases for algorithms, commented as Case_I, ...
        1) Sample - Each alignment entity is single sample.
        2) DefaultPooling - Each Alignment entity is a pool based on sample Ids
                I believe all samples in pool have same sample_name in db
        3) ArbPooling - Each Alignment entity is a arbitrary pooling. For
                sample_name we will again use pooId. This is confusing,
                because throughout this code Sample often means 'alignment
                entity'

        """

        # TODO Refactor this, by getting ct's first.
        # A little tricky to test throughly:
        # is always used, even for pools of 1 with the same name as the sample.
        # refGenome (matching all, none, or some of default vaules)
        # Sample Sets: SubNum 1 Fcid, SubNum multFC, custom sample set,
        #    default pool, custom pool
        # Homogenous/Heterogeneous submissions
        # 30 cases
        self.calcTuples = []
        doPooling = False
        if 'pool' in kwargs:
            if isinstance(kwargs['pool'], dict):
                self.buildCalcTupleForArbitraryPooling(**kwargs)  # Case III
                return
            elif kwargs['pool'] is False:
                kwargs.pop('pool')
            else:
                doPooling = True
                assert kwargs['pool'] is True, \
                    "allowed vals for pool: True (False)  or dct"
        elif 'sampList' in kwargs:
            self.buildCalcTupleForSampList(**kwargs)
            return
        if 'refGenome' in kwargs:
            assert 'subNum' in kwargs or 'sampList' in kwargs, \
                "must specify single subNum with refG"
            #refGenome = kwargs.pop('refGenome')
        # else:
            #refGenome = None
        # TODO fcidOnly or MultFC? two descriptions of same thing
        ''' if not ('multFC' in kwargs and kwargs['multFC']):
            kwargs['fcid'] = self.fcid
            kwargs['multFC'] = False '''

        subNumToSamps = self.getSubNumToSampsDict(**kwargs)
        # TODO this shouldn't be here. find When samples are first imported
        # from stemcell and lower them there.
        for samp in self.db.tables['Samp']:
            self.db.tables['Samp'][samp]['genome'] = self.db.tables[
                'Samp'][samp]['genome'].lower()

        for subNum in subNumToSamps:
            if 'subNum' in kwargs:
                kwargs.pop('subNum')
            for genome in set([self.db.getAttrFromSamp('genome', el)
                               for el in subNumToSamps[subNum]]):
                subSampList = sorted([el for el in subNumToSamps[subNum] if
                                      self.db.getAttrFromSamp('genome', el)
                                      == genome])
                prj = self.db.getAttrFromSamp('project_name', subSampList[0])
                pool = {}
                if doPooling:
                    pool = self.getPoolFromSubSamps(subSampList)

                ct = calc_tuple.CalcTuple(
                    db=self.db,
                    node='Collate',
                    subNum=subNum,
                    Sample=subSampList,
                    pool=pool,
                    poolId=sorted(pool.keys()),
                    fcid=self.fcid,
                    **kwargs)

                self.calcTuples.append(ct)
                try:
                    subNumStr = "{0:04d}".format(int(subNum))
                except BaseException:
                    subNumStr = subNum

                _outdir = p_join(self.dirPref,
                                 "Sub_{0}_{1}_{2}__{3}".format(
                                     subNumStr,
                                     prj,
                                     ct.getRefGenome(),
                                     ct.hsh[:config.ODIR_HSH_LEN]))
                if self.tryToLoadFinishedCalc(ct):
                    continue
                self.buildCalcInfoWithErrAndWrn(ct, _outdir)
                ct.putMetadata(Sample=subSampList)
                subprocess.call(['mkdir', '-p', '-m', '777',
                                 ct.getMetadata('outdir')])