def _commonComplementGtrackFile(origFn, dbFn, intersectingFactor, gtrackColsToAdd, genome):
    origGESource = GtrackGenomeElementSource(origFn, genome)
    dbGESource = GtrackGenomeElementSource(dbFn, genome)
    
    dbPrefixes = dbGESource.getPrefixList()

    if intersectingFactor == 'id':
        fullDbDict = IdFullInfoDict(dbGESource, dbPrefixes)
    elif intersectingFactor == 'position':
        fullDbDict = TupleFullInfoDict(dbGESource, dbPrefixes)
    else:
        ShouldNotOccurError
        
    forcedHeaderDict = {}
    dbHeaderDict = dbGESource.getHeaderDict()
    
    if 'value' in gtrackColsToAdd:
        forcedHeaderDict['value type'] = dbHeaderDict['value type']
        forcedHeaderDict['value dimension'] = dbHeaderDict['value dimension']
    if 'edges' in gtrackColsToAdd:
        forcedHeaderDict['edge weight type'] = dbHeaderDict['edge weight type']
        forcedHeaderDict['edge weight dimension'] = dbHeaderDict['edge weight dimension']
    
    composerCls = ExtendedGtrackComposer if origGESource.isExtendedGtrackFile() else StdGtrackComposer    
    composedFile = composerCls( ElementComplementer(origGESource, fullDbDict, gtrackColsToAdd), \
                                forcedHeaderDict=forcedHeaderDict).returnComposed()
        
    return expandHeadersOfGtrackFileAndReturnComposer('', genome, strToUseInsteadOfFn=composedFile)
Exemple #2
0
def _commonComplementGtrackFile(origFn, dbFn, intersectingFactor, gtrackColsToAdd, genome):
    origGESource = GtrackGenomeElementSource(origFn, genome)
    dbGESource = GtrackGenomeElementSource(dbFn, genome)
    
    dbPrefixes = dbGESource.getPrefixList()

    if intersectingFactor == 'id':
        fullDbDict = IdFullInfoDict(dbGESource, dbPrefixes)
    elif intersectingFactor == 'position':
        fullDbDict = TupleFullInfoDict(dbGESource, dbPrefixes)
    else:
        ShouldNotOccurError
        
    forcedHeaderDict = {}
    dbHeaderDict = dbGESource.getHeaderDict()
    
    if 'value' in gtrackColsToAdd:
        forcedHeaderDict['value type'] = dbHeaderDict['value type']
        forcedHeaderDict['value dimension'] = dbHeaderDict['value dimension']
    if 'edges' in gtrackColsToAdd:
        forcedHeaderDict['edge weight type'] = dbHeaderDict['edge weight type']
        forcedHeaderDict['edge weight dimension'] = dbHeaderDict['edge weight dimension']
    
    composerCls = ExtendedGtrackComposer if origGESource.isExtendedGtrackFile() else StdGtrackComposer    
    composedFile = composerCls( ElementComplementer(origGESource, fullDbDict, gtrackColsToAdd), \
                                forcedHeaderDict=forcedHeaderDict).returnComposed()
        
    return expandHeadersOfGtrackFileAndReturnComposer('', genome, strToUseInsteadOfFn=composedFile)
Exemple #3
0
    def _determineIfFileCompliesToSubtypes(self, hbColumns, columns):
        if 'subtype url' in self._forcedHeaderDict:
            subtypeUrlList = [self._forcedHeaderDict['subtype url']] \
                if self._forcedHeaderDict['subtype url'] != '' else []
        else:
            subtypeUrlList = self.GTRACK_PRIORITIZED_SUBTYPE_LIST

        for subtypeUrl in subtypeUrlList:
            subtypeGESource = Gtrack.getSubtypeGESource(subtypeUrl)
            subtypeColumns = subtypeGESource.getColumns(orig=False)
            subtypeHeaders = subtypeGESource.getHeaderDict()

            numRepeats = 2 if subtypeHeaders[
                'subtype adherence'] == 'redefinable' else 1

            for repeat in range(numRepeats):
                self._setHeaderDict('1-indexed', subtypeHeaders['1-indexed'])
                self._setHeaderDict('end inclusive',
                                    subtypeHeaders['end inclusive'])

                if subtypeHeaders['subtype adherence'] in [
                        'reorderable', 'free'
                ]:
                    rearrangedColumns = columns
                    rearrangedHbColumns = hbColumns
                else:
                    colSet = set(columns)
                    subtypeColSet = set(subtypeColumns)

                    if subtypeHeaders['subtype adherence'] == 'redefinable':
                        colsRemoved = list(subtypeColSet - colSet)
                        colsAdded = list(colSet - subtypeColSet)
                        if len(colsRemoved) != len(colsAdded) or len(
                                colsRemoved) > 2:
                            continue

                        colsRedefinedTo = [
                            'value', 'edges'
                        ] if repeat == 1 else ['edges', 'value']

                        rearrangedColumns = []
                        i, j = (0, 0)
                        for col in subtypeColumns:
                            if col in colsRemoved:
                                rearrangedColumns.append(colsRedefinedTo[i])
                                i += 1
                            elif col in colsRedefinedTo:
                                rearrangedColumns.append(colsAdded[j])
                                j += 1
                            else:
                                rearrangedColumns.append(col)

                        for col in columns:
                            if col in colsAdded[j:]:
                                rearrangedColumns.append(col)
                    else:
                        rearrangedColumns = [x for x in subtypeColumns if x in colSet] + \
                                            [x for x in columns if x not in subtypeColSet]
                    rearrangedHbColumns = self._getHbColumnsFromGtrackColumns(
                        rearrangedColumns)

                try:
                    tempFile = StringIO()
                    self._composeContents(tempFile, rearrangedHbColumns, rearrangedColumns, \
                                          deepcopy(self._geSource), onlyNonDefault=True, singleDataLine=True)

                    gtrackGESource = Gtrack('subtypeTest.' + self.getDefaultFileNameSuffix(), printWarnings=False, \
                                            strToUseInsteadOfFn=tempFile.getvalue())
                    tempFile.close()

                    if gtrackGESource.compliesWithSubtype(subtypeUrl):
                        gtrackGESource._headerDict['subtype url'] = subtypeUrl
                        gtrackGESource._updateHeadersAccordingToSubtype()
                        updatedHeaders = OrderedDict([(key, val) for key,val in gtrackGESource.getHeaderDict().iteritems() \
                                          if val != Gtrack.DEFAULT_HEADER_DICT.get(key)])
                        for header in updatedHeaders:
                            self._setHeaderDict(header, updatedHeaders[header])

                        return rearrangedHbColumns, rearrangedColumns, True
                except Exception, e:
                    continue
 def _determineIfFileCompliesToSubtypes(self, hbColumns, columns):
     if 'subtype url' in self._forcedHeaderDict:
         subtypeUrlList = [self._forcedHeaderDict['subtype url']] \
             if self._forcedHeaderDict['subtype url'] != '' else []
     else:
         subtypeUrlList = self.GTRACK_PRIORITIZED_SUBTYPE_LIST
 
     for subtypeUrl in subtypeUrlList:
         subtypeGESource = Gtrack.getSubtypeGESource(subtypeUrl)
         subtypeColumns = subtypeGESource.getColumns(orig=False)
         subtypeHeaders = subtypeGESource.getHeaderDict()
         
         numRepeats = 2 if subtypeHeaders['subtype adherence'] == 'redefinable' else 1
         
         for repeat in range(numRepeats):
             self._setHeaderDict('1-indexed', subtypeHeaders['1-indexed'])
             self._setHeaderDict('end inclusive', subtypeHeaders['end inclusive'])
             
             if subtypeHeaders['subtype adherence'] in ['reorderable', 'free']:
                 rearrangedColumns = columns
                 rearrangedHbColumns = hbColumns
             else:
                 colSet = set(columns)
                 subtypeColSet = set(subtypeColumns)
                 
                 if subtypeHeaders['subtype adherence'] == 'redefinable':
                     colsRemoved = list(subtypeColSet - colSet)
                     colsAdded = list(colSet - subtypeColSet)
                     if len(colsRemoved) != len(colsAdded) or len(colsRemoved) > 2:
                         continue
                     
                     colsRedefinedTo = ['value', 'edges'] if repeat == 1 else ['edges', 'value']
                     
                     rearrangedColumns = []
                     i,j = (0,0)
                     for col in subtypeColumns:
                         if col in colsRemoved:
                             rearrangedColumns.append(colsRedefinedTo[i])
                             i += 1
                         elif col in colsRedefinedTo:
                             rearrangedColumns.append(colsAdded[j])
                             j += 1
                         else:
                             rearrangedColumns.append(col)
                             
                     for col in columns:
                         if col in colsAdded[j:]:
                             rearrangedColumns.append(col)
                 else:
                     rearrangedColumns = [x for x in subtypeColumns if x in colSet] + \
                                         [x for x in columns if x not in subtypeColSet]
                 rearrangedHbColumns = self._getHbColumnsFromGtrackColumns(rearrangedColumns)
             
             try:
                 tempFile = StringIO()
                 self._composeContents(tempFile, rearrangedHbColumns, rearrangedColumns, \
                                       deepcopy(self._geSource), onlyNonDefault=True, singleDataLine=True)
                     
                 gtrackGESource = Gtrack('subtypeTest.' + self.getDefaultFileNameSuffix(), printWarnings=False, \
                                         strToUseInsteadOfFn=tempFile.getvalue())
                 tempFile.close()
                 
                 if gtrackGESource.compliesWithSubtype(subtypeUrl):
                     gtrackGESource._headerDict['subtype url'] = subtypeUrl
                     gtrackGESource._updateHeadersAccordingToSubtype()
                     updatedHeaders = OrderedDict([(key, val) for key,val in gtrackGESource.getHeaderDict().iteritems() \
                                       if val != Gtrack.DEFAULT_HEADER_DICT.get(key)])
                     for header in updatedHeaders:
                         self._setHeaderDict(header, updatedHeaders[header])
                     
                     return rearrangedHbColumns, rearrangedColumns, True
             except Exception, e:
                 continue