Ejemplo n.º 1
0
def loadGTF(line, fp, fname, labels, regions, transcriptID, transcript_id_designator, defaultGroup):
    """
    Like loadBED, but for a GTF file

    This is largely a copy of what's in deeptoolsintervals
    """
    file_label = dti.findRandomLabel(labels, os.path.basename(fname))

    # handle the first line
    cols = line.split("\t")
    if cols[2].lower() == transcriptID.lower():
        label, name = loadGTFtranscript(cols, file_label, defaultGroup, transcript_id_designator)
        if label is not None:
            if label not in labels:
                labels[label] = len(labels)
                regions.append(dict())
            labelIdx = labels[label]
            regions[labelIdx][name] = len(regions[labelIdx])

    for line in fp:
        if not isinstance(line, str):
            line = line.decode('ascii')
        if not line.startswith('#'):
            cols = line.strip().split('\t')
            if len(cols) == 0:
                continue
            if cols[2].lower() == transcriptID:
                label, name = loadGTFtranscript(cols, file_label, defaultGroup, transcript_id_designator)
                if label is None:
                    continue
                if label not in labels:
                    labels[label] = len(labels)
                    regions.append(dict())
                labelIdx = labels[label]
                regions[labelIdx][name] = len(regions[labelIdx])
Ejemplo n.º 2
0
def loadBED(line, fp, fname, labelColumn, labels, regions, defaultGroup):
    """
    Given a first line, possibly a label column and a list of labels and regions, add the labels and regions in the file to them
    """

    # This is largely parseBED from deeptoolsintervals
    labelIdx = None
    localRegions = {}

    cols = line.strip().split("\t")
    if labelColumn is not None:
        label = cols.pop(labelColumn)
        if label not in labels:
            labels[label] = len(labels)
        labelIdx = labels[label]
        if labelIdx >= len(regions):
            regions.append(localRegions)
        else:
            localRegions = regions[labelIdx]

    if len(cols) >= 6:
        name = cols[3]
    else:
        name = "{0}:{1}-{2}".format(cols[0], cols[1], cols[2])
    localRegions[name] = len(localRegions)

    for line in fp:
        if line.startswith("#") and labelColumn is None:
            if len(localRegions) > 0:
                label = line[1:].strip()
                if len(label):
                    labels[dti.findRandomLabel(labels, label)] = len(labels)
                else:
                    labels[dti.findRandomLabel(
                        labels, os.path.basename(fname))] = len(labels)
                regions.append(localRegions)
                localRegions = dict()
            continue
        elif line.startswith("#") and labelColumn is not None:
            continue

        cols = line.strip().split("\t")
        if len(cols) < 3:
            continue
        if labelColumn is not None:
            label = cols.pop(labelColumn)
            if label not in labels:
                labels[label] = len(labels)
            labelIdx = labels[label]
            if labelIdx >= len(regions):
                regions.append({})
            localRegions = regions[labelIdx]

        if len(cols) >= 6:
            name = cols[3]
        else:
            name = "{0}:{1}-{2}".format(cols[0], cols[1], cols[2])
        name = dti.findRandomLabel(localRegions, name)
        localRegions[name] = len(localRegions)

    # Handle the last group if there is no label
    if labelIdx is None and len(localRegions) > 0:
        if defaultGroup is not None:
            labels[dti.findRandomLabel(labels, defaultGroup)] = len(labels)
        else:
            labels[dti.findRandomLabel(labels,
                                       os.path.basename(fname))] = len(labels)
        regions.append(localRegions)
def loadBED(line, fp, fname, labelColumn, labels, regions, defaultGroup):
    """
    Given a first line, possibly a label column and a list of labels and regions, add the labels and regions in the file to them
    """

    # This is largely parseBED from deeptoolsintervals
    labelIdx = None
    localRegions = {}

    cols = line.strip().split("\t")
    if labelColumn is not None:
        label = cols.pop(labelColumn)
        if label not in labels:
            labels[label] = len(labels)
        labelIdx = labels[label]
        if labelIdx >= len(regions):
            regions.append(localRegions)
        else:
            localRegions = regions[labelIdx]

    if len(cols) >= 6:
        name = cols[3]
    else:
        name = "{0}:{1}-{2}".format(cols[0], cols[1], cols[2])
    localRegions[name] = len(localRegions)

    for line in fp:
        if line.startswith("#") and labelColumn is None:
            if len(localRegions) > 0:
                label = line[1:].strip()
                if len(label):
                    labels[dti.findRandomLabel(labels, label)] = len(labels)
                else:
                    labels[dti.findRandomLabel(labels, os.path.basename(fname))] = len(labels)
                regions.append(localRegions)
                localRegions = dict()
            continue
        elif line.startswith("#") and labelColumn is not None:
            continue

        cols = line.strip().split("\t")
        if len(cols) < 3:
            continue
        if labelColumn is not None:
            label = cols.pop(labelColumn)
            if label not in labels:
                labels[label] = len(labels)
            labelIdx = labels[label]
            if labelIdx >= len(regions):
                regions.append({})
            localRegions = regions[labelIdx]

        if len(cols) >= 6:
            name = cols[3]
        else:
            name = "{0}:{1}-{2}".format(cols[0], cols[1], cols[2])
        name = dti.findRandomLabel(localRegions, name)
        localRegions[name] = len(localRegions)

    # Handle the last group if there is no label
    if labelIdx is None and len(localRegions) > 0:
        if defaultGroup is not None:
            labels[dti.findRandomLabel(labels, defaultGroup)] = len(labels)
        else:
            labels[dti.findRandomLabel(labels, os.path.basename(fname))] = len(labels)
        regions.append(localRegions)