def _commonComplementGtrackFile(origFn, dbFn, intersectingFactor, gtrackColsToAdd, genome):
    origGESource = GtrackGenomeElementSource(origFn, genome)
    dbGESource = GtrackGenomeElementSource(dbFn, genome)
    
    dbPrefixes = dbGESource.getPrefixList()

    if intersectingFactor == 'id':
        fullDbDict = IdFullInfoDict(dbGESource, dbPrefixes)
    elif intersectingFactor == 'position':
        fullDbDict = TupleFullInfoDict(dbGESource, dbPrefixes)
    else:
        ShouldNotOccurError
        
    forcedHeaderDict = {}
    dbHeaderDict = dbGESource.getHeaderDict()
    
    if 'value' in gtrackColsToAdd:
        forcedHeaderDict['value type'] = dbHeaderDict['value type']
        forcedHeaderDict['value dimension'] = dbHeaderDict['value dimension']
    if 'edges' in gtrackColsToAdd:
        forcedHeaderDict['edge weight type'] = dbHeaderDict['edge weight type']
        forcedHeaderDict['edge weight dimension'] = dbHeaderDict['edge weight dimension']
    
    composerCls = ExtendedGtrackComposer if origGESource.isExtendedGtrackFile() else StdGtrackComposer    
    composedFile = composerCls( ElementComplementer(origGESource, fullDbDict, gtrackColsToAdd), \
                                forcedHeaderDict=forcedHeaderDict).returnComposed()
        
    return expandHeadersOfGtrackFileAndReturnComposer('', genome, strToUseInsteadOfFn=composedFile)
Beispiel #2
0
    def _determineIfFileCompliesToSubtypes(self, hbColumns, columns):
        if "subtype url" in self._forcedHeaderDict:
            subtypeUrlList = (
                [self._forcedHeaderDict["subtype url"]] if self._forcedHeaderDict["subtype url"] != "" else []
            )
        else:
            subtypeUrlList = self.GTRACK_PRIORITIZED_SUBTYPE_LIST

        for subtypeUrl in subtypeUrlList:
            subtypeGESource = Gtrack.getSubtypeGESource(subtypeUrl)
            subtypeColumns = subtypeGESource.getColumns(orig=False)
            subtypeHeaders = subtypeGESource.getHeaderDict()

            numRepeats = 2 if subtypeHeaders["subtype adherence"] == "redefinable" else 1

            for repeat in range(numRepeats):
                self._setHeaderDict("1-indexed", subtypeHeaders["1-indexed"])
                self._setHeaderDict("end inclusive", subtypeHeaders["end inclusive"])

                if subtypeHeaders["subtype adherence"] in ["reorderable", "free"]:
                    rearrangedColumns = columns
                    rearrangedHbColumns = hbColumns
                else:
                    colSet = set(columns)
                    subtypeColSet = set(subtypeColumns)

                    if subtypeHeaders["subtype adherence"] == "redefinable":
                        colsRemoved = list(subtypeColSet - colSet)
                        colsAdded = list(colSet - subtypeColSet)
                        if len(colsRemoved) != len(colsAdded) or len(colsRemoved) > 2:
                            continue

                        colsRedefinedTo = ["value", "edges"] if repeat == 1 else ["edges", "value"]

                        rearrangedColumns = []
                        i, j = (0, 0)
                        for col in subtypeColumns:
                            if col in colsRemoved:
                                rearrangedColumns.append(colsRedefinedTo[i])
                                i += 1
                            elif col in colsRedefinedTo:
                                rearrangedColumns.append(colsAdded[j])
                                j += 1
                            else:
                                rearrangedColumns.append(col)

                        for col in columns:
                            if col in colsAdded[j:]:
                                rearrangedColumns.append(col)
                    else:
                        rearrangedColumns = [x for x in subtypeColumns if x in colSet] + [
                            x for x in columns if x not in subtypeColSet
                        ]
                    rearrangedHbColumns = self._getHbColumnsFromGtrackColumns(rearrangedColumns)

                try:
                    tempFile = StringIO()
                    self._composeContents(
                        tempFile,
                        rearrangedHbColumns,
                        rearrangedColumns,
                        deepcopy(self._geSource),
                        onlyNonDefault=True,
                        singleDataLine=True,
                    )

                    gtrackGESource = Gtrack(
                        "subtype.test." + self.getDefaultFileNameSuffix(),
                        printWarnings=False,
                        strToUseInsteadOfFn=tempFile.getvalue(),
                    )
                    tempFile.close()

                    if gtrackGESource.compliesWithSubtype(subtypeUrl):
                        gtrackGESource._headerDict["subtype url"] = subtypeUrl
                        gtrackGESource._updateHeadersAccordingToSubtype()
                        updatedHeaders = OrderedDict(
                            [
                                (key, val)
                                for key, val in gtrackGESource.getHeaderDict().iteritems()
                                if val != Gtrack.DEFAULT_HEADER_DICT.get(key)
                            ]
                        )
                        for header in updatedHeaders:
                            self._setHeaderDict(header, updatedHeaders[header])

                        return rearrangedHbColumns, rearrangedColumns, True
                except Exception, e:
                    continue