Python seemsLikeGTF 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: deeptoolsintervals.parse

메소드/함수: seemsLikeGTF

hotexamples.com에서의 예제들: 2

Python seemsLikeGTF - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 deeptoolsintervals.parse.seemsLikeGTF에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def sortMatrix(hm, regionsFileName, transcriptID, transcript_id_designator):
    """
    Iterate through the files noted by regionsFileName and sort hm accordingly
    """

    labels = dict()
    regions = []
    defaultGroup = None
    if len(regionsFileName) == 1:
        defaultGroup = "genes"
    for fname in regionsFileName:
        fp = dti.openPossiblyCompressed(fname)
        line = dti.getNext(fp)
        labelColumn = None
        while line.startswith("#"):
            if not labelColumn:
                labelColumn = dti.getLabel(line)
            line = dti.getNext(fp)

        # Find the label column
        subtract = 0
        if labelColumn is not None:
            subtract = 1

        # Determine the file type and load into a list (or list of lists)
        cols = line.strip().split("\t")
        if len(cols) - subtract < 3:
            raise RuntimeError(
                '{0} does not seem to be a recognized file type!'.format(
                    fname))
        elif len(cols) - subtract <= 6:
            loadBED(line, fp, fname, labelColumn, labels, regions,
                    defaultGroup)
        elif len(cols) and dti.seemsLikeGTF(cols):
            loadGTF(line, fp, fname, labels, regions, transcriptID,
                    transcript_id_designator, defaultGroup)
        else:
            loadBED(line, fp, fname, labelColumn, labels, regions,
                    defaultGroup)
        fp.close()

    # Do some sanity checking on the group labels and region names within them
    s1 = set(hm.parameters['group_labels'])
    for e in labels:
        if e not in s1:
            sys.exit(
                "The computeMatrix output is missing the '{}' region group. It has [] but the specified regions have {}.\n"
                .format(e, s1, labels.keys()))

    # Make a dictionary out of current labels and regions
    d = dict()
    pos = 0
    groupSizes = dict()
    for idx, label in enumerate(hm.parameters['group_labels']):
        s = hm.parameters['group_boundaries'][idx]
        e = hm.parameters['group_boundaries'][idx + 1]
        if label not in labels:
            continue
        d[label] = dict()
        groupSize = 0
        for reg in hm.matrix.regions[s:e]:
            d[label][reg[2]] = pos
            pos += 1
            groupSize += 1
        groupSizes[label] = groupSize

    # Convert labels to an ordered list
    labelsList = [""] * len(labels)
    for k, v in labels.items():
        labelsList[v] = k

    # Reorder
    order = []
    boundaries = [0]
    for idx, label in enumerate(labelsList):
        # Make an ordered list out of the region names in this region group
        _ = [""] * len(regions[idx])
        for k, v in regions[idx].items():
            _[v] = k
        sz = 0  # Track the number of enries actually matched
        for name in _:
            if name not in d[label]:
                sys.stderr.write(
                    "Skipping {}, due to being absent in the computeMatrix output.\n"
                    .format(name))
                continue
            sz += 1
            order.append(d[label][name])
        if sz == 0:
            sys.exit(
                "The region group {} had no matching entries!\n".format(label))
        boundaries.append(sz + boundaries[-1])
    hm.matrix.regions = [hm.matrix.regions[i] for i in order]
    order = np.array(order)
    hm.matrix.matrix = hm.matrix.matrix[order, :]

    # Update the parameters
    hm.parameters["group_labels"] = labelsList
    hm.matrix.group_labels = labelsList
    hm.parameters["group_boundaries"] = boundaries
    hm.matrix.group_boundaries = boundaries

예제 #2

파일 보기

파일: computeMatrixOperations.py 프로젝트: venuthatikonda/deepTools

def sortMatrix(hm, regionsFileName, transcriptID, transcript_id_designator):
    """
    Iterate through the files noted by regionsFileName and sort hm accordingly
    """

    labels = dict()
    regions = []
    defaultGroup = None
    if len(regionsFileName) == 1:
        defaultGroup = "genes"
    for fname in regionsFileName:
        fp = dti.openPossiblyCompressed(fname)
        line = dti.getNext(fp)
        labelColumn = None
        while line.startswith("#"):
            if not labelColumn:
                labelColumn = dti.getLabel(line)
            line = dti.getNext(fp)

        # Find the label column
        subtract = 0
        if labelColumn is not None:
            subtract = 1

        # Determine the file type and load into a list (or list of lists)
        cols = line.strip().split("\t")
        if len(cols) - subtract < 3:
            raise RuntimeError('{0} does not seem to be a recognized file type!'.format(fname))
        elif len(cols) - subtract <= 6:
            loadBED(line, fp, fname, labelColumn, labels, regions, defaultGroup)
        elif len(cols) and dti.seemsLikeGTF(cols):
            loadGTF(line, fp, fname, labels, regions, transcriptID, transcript_id_designator, defaultGroup)
        else:
            loadBED(line, fp, fname, labelColumn, labels, regions, defaultGroup)
        fp.close()

    # Do some sanity checking on the group labels and region names within them
    s1 = set(hm.parameters['group_labels'])
    for e in labels:
        if e not in s1:
            sys.exit("The computeMatrix output is missing the '{}' region group. It has [] but the specified regions have {}.\n".format(e, s1, labels.keys()))

    # Make a dictionary out of current labels and regions
    d = dict()
    pos = 0
    groupSizes = dict()
    for idx, label in enumerate(hm.parameters['group_labels']):
        s = hm.parameters['group_boundaries'][idx]
        e = hm.parameters['group_boundaries'][idx + 1]
        if label not in labels:
            continue
        d[label] = dict()
        groupSize = 0
        for reg in hm.matrix.regions[s:e]:
            d[label][reg[2]] = pos
            pos += 1
            groupSize += 1
        groupSizes[label] = groupSize

    # Convert labels to an ordered list
    labelsList = [""] * len(labels)
    for k, v in labels.items():
        labelsList[v] = k

    # Reorder
    order = []
    boundaries = [0]
    for idx, label in enumerate(labelsList):
        # Make an ordered list out of the region names in this region group
        _ = [""] * len(regions[idx])
        for k, v in regions[idx].items():
            _[v] = k
        for name in _:
            if name not in d[label]:
                sys.stderr.write("Skipping {}, due to being absent in the computeMatrix output.\n".format(name))
                continue
            order.append(d[label][name])
        boundaries.append(groupSizes[label] + boundaries[-1])
    hm.matrix.regions = [hm.matrix.regions[i] for i in order]
    order = np.array(order)
    hm.matrix.matrix = hm.matrix.matrix[order, :]

    # Update the parameters
    hm.parameters["group_labels"] = labelsList
    hm.matrix.group_labels = labelsList
    hm.parameters["group_boundaries"] = boundaries
    hm.matrix.group_boundaries = boundaries