def _commonComplementGtrackFile(origFn, dbFn, intersectingFactor, gtrackColsToAdd, genome): origGESource = GtrackGenomeElementSource(origFn, genome) dbGESource = GtrackGenomeElementSource(dbFn, genome) dbPrefixes = dbGESource.getPrefixList() if intersectingFactor == 'id': fullDbDict = IdFullInfoDict(dbGESource, dbPrefixes) elif intersectingFactor == 'position': fullDbDict = TupleFullInfoDict(dbGESource, dbPrefixes) else: ShouldNotOccurError forcedHeaderDict = {} dbHeaderDict = dbGESource.getHeaderDict() if 'value' in gtrackColsToAdd: forcedHeaderDict['value type'] = dbHeaderDict['value type'] forcedHeaderDict['value dimension'] = dbHeaderDict['value dimension'] if 'edges' in gtrackColsToAdd: forcedHeaderDict['edge weight type'] = dbHeaderDict['edge weight type'] forcedHeaderDict['edge weight dimension'] = dbHeaderDict['edge weight dimension'] composerCls = ExtendedGtrackComposer if origGESource.isExtendedGtrackFile() else StdGtrackComposer composedFile = composerCls( ElementComplementer(origGESource, fullDbDict, gtrackColsToAdd), \ forcedHeaderDict=forcedHeaderDict).returnComposed() return expandHeadersOfGtrackFileAndReturnComposer('', genome, strToUseInsteadOfFn=composedFile)
def _determineIfFileCompliesToSubtypes(self, hbColumns, columns): if "subtype url" in self._forcedHeaderDict: subtypeUrlList = ( [self._forcedHeaderDict["subtype url"]] if self._forcedHeaderDict["subtype url"] != "" else [] ) else: subtypeUrlList = self.GTRACK_PRIORITIZED_SUBTYPE_LIST for subtypeUrl in subtypeUrlList: subtypeGESource = Gtrack.getSubtypeGESource(subtypeUrl) subtypeColumns = subtypeGESource.getColumns(orig=False) subtypeHeaders = subtypeGESource.getHeaderDict() numRepeats = 2 if subtypeHeaders["subtype adherence"] == "redefinable" else 1 for repeat in range(numRepeats): self._setHeaderDict("1-indexed", subtypeHeaders["1-indexed"]) self._setHeaderDict("end inclusive", subtypeHeaders["end inclusive"]) if subtypeHeaders["subtype adherence"] in ["reorderable", "free"]: rearrangedColumns = columns rearrangedHbColumns = hbColumns else: colSet = set(columns) subtypeColSet = set(subtypeColumns) if subtypeHeaders["subtype adherence"] == "redefinable": colsRemoved = list(subtypeColSet - colSet) colsAdded = list(colSet - subtypeColSet) if len(colsRemoved) != len(colsAdded) or len(colsRemoved) > 2: continue colsRedefinedTo = ["value", "edges"] if repeat == 1 else ["edges", "value"] rearrangedColumns = [] i, j = (0, 0) for col in subtypeColumns: if col in colsRemoved: rearrangedColumns.append(colsRedefinedTo[i]) i += 1 elif col in colsRedefinedTo: rearrangedColumns.append(colsAdded[j]) j += 1 else: rearrangedColumns.append(col) for col in columns: if col in colsAdded[j:]: rearrangedColumns.append(col) else: rearrangedColumns = [x for x in subtypeColumns if x in colSet] + [ x for x in columns if x not in subtypeColSet ] rearrangedHbColumns = self._getHbColumnsFromGtrackColumns(rearrangedColumns) try: tempFile = StringIO() self._composeContents( tempFile, rearrangedHbColumns, rearrangedColumns, deepcopy(self._geSource), onlyNonDefault=True, singleDataLine=True, ) gtrackGESource = Gtrack( "subtype.test." + self.getDefaultFileNameSuffix(), printWarnings=False, strToUseInsteadOfFn=tempFile.getvalue(), ) tempFile.close() if gtrackGESource.compliesWithSubtype(subtypeUrl): gtrackGESource._headerDict["subtype url"] = subtypeUrl gtrackGESource._updateHeadersAccordingToSubtype() updatedHeaders = OrderedDict( [ (key, val) for key, val in gtrackGESource.getHeaderDict().iteritems() if val != Gtrack.DEFAULT_HEADER_DICT.get(key) ] ) for header in updatedHeaders: self._setHeaderDict(header, updatedHeaders[header]) return rearrangedHbColumns, rearrangedColumns, True except Exception, e: continue