コード例 #1
0
def segment_matrix(matrix: List[Path], bin_width, cells_per_file,
                   pangenome_length) -> PangenomeSchematic:
    from matrixcomponent import JSON_VERSION
    print(f"Starting Segmentation process on {len(matrix)} Paths.")
    schematic = PangenomeSchematic(JSON_VERSION, bin_width, 1, 1, [],
                                   [p.name for p in matrix], 1,
                                   pangenome_length)
    incoming, outgoing, dividers = dividers_with_max_size(
        matrix, cells_per_file)
    start_pos = 0
    for valid_start in dividers:
        if valid_start != 0:
            current = Component(start_pos, valid_start - 1)
            # current.active_members = 1
            schematic.components.append(current)
        start_pos = valid_start
    print(f"Created {len(schematic.components)} components")

    # populate Component occupancy per Path
    populate_component_matrix(matrix, schematic)

    # populate all link columns onto schematic
    nLinkColumns = 0
    for component in schematic.components:
        # TODO: order columns based on traversal patterns,
        # TODO: insert additional columns for higher copy number
        for origin_pos, participants in incoming[component.first_bin].items():
            phase_dots = [
                indiv in participants for indiv in schematic.path_names
            ]
            entering = LinkColumn(origin_pos,
                                  component.first_bin,
                                  participants=phase_dots)
            component.arrivals.append(entering)
            nLinkColumns += 1
        for arriving_pos, participants in outgoing[component.last_bin].items():
            # phase_dots depends on row ordering of path names, optimized for display
            phase_dots = [
                indiv in participants for indiv in schematic.path_names
            ]
            leaving = LinkColumn(component.last_bin,
                                 arriving_pos,
                                 participants=phase_dots)
            component.departures.append(leaving)
            nLinkColumns += 1

    for i in range(len(schematic.components) - 1):
        component, next_component = schematic.components[
            i], schematic.components[i + 1]
        add_adjacent_connector_column(component, next_component, schematic)

    print(f"Created {nLinkColumns} LinkColumns")

    return schematic
コード例 #2
0
def add_adjacent_connector_column(component, next_component, schematic):
    """The last Departure LinkColumn is to the adjacent component
    Use logic to decide on which rows need adjacent connectors
    Start with the easy subtractive case of occupancy - departures and move to more complex,
    multiple copy cases."""

    ids = np.arange(len(schematic.path_names))
    common = component.occupants & next_component.occupants if (
        component and next_component) else []
    filtered_rows = np.asarray([ids[j] for j in common])
    adjacents = filtered_rows  # we take all the filtered IDs if there are no departures

    if len(filtered_rows) > 0 and len(
            component.departures) > 0:  # potentially there's work to do
        ids = np.concatenate(
            [column.participants for column in component.departures])
        isin = np.isin(filtered_rows, ids, invert=True)
        adjacents = filtered_rows[isin]

    # if adjacents.size > 0:  # add linkcolumn as placeholder even when an empty list of participants
    component.departures.append(
        LinkColumn(  # LinkColumn for adjacents
            component.last_bin,
            component.last_bin + 1,
            participants=np.asarray(adjacents).astype(dtype='int32')))
コード例 #3
0
def segment_matrix(matrix: List[Path], bin_width, cells_per_file,
                   pangenome_length) -> PangenomeSchematic:
    from matrixcomponent import JSON_VERSION
    print(f"Starting Segmentation process on {len(matrix)} Paths.")
    schematic = PangenomeSchematic(JSON_VERSION, bin_width, 1, 1, [],
                                   [p.name for p in matrix], 1,
                                   pangenome_length)
    connections, dividers = dividers_with_max_size(matrix, cells_per_file)

    component_by_first_bin = {}
    component_by_last_bin = {}
    start_pos = 0
    for valid_start in dividers:
        if valid_start != 0:
            current = Component(start_pos, valid_start - 1)
            # current.active_members = 1
            schematic.components.append(current)
            component_by_first_bin[start_pos] = current
            component_by_last_bin[valid_start - 1] = current
        start_pos = valid_start
    print(f"Created {len(schematic.components)} components")

    # populate Component occupancy per Path
    populate_component_matrix(matrix, schematic)

    connections_array = connections.to_numpy()
    groups = utils.find_groups(connections_array[:, :2])
    path_indices = connections.path_index.to_numpy()

    participants_mask = np.zeros(len(schematic.path_names), dtype=bool)

    nLinkColumns = 0
    for (start, end) in groups:
        row = connections_array[start]
        src, dst = int(row[0]), int(row[1])

        participants_mask[:] = False
        participants_mask[path_indices[start:end]] = True
        phase_dots = participants_mask.tolist()
        link_column = LinkColumn(src, dst, participants=phase_dots)

        src_component = component_by_last_bin.get(src)
        dst_component = component_by_first_bin.get(dst)

        if src_component:
            src_component.departures.append(link_column)
            nLinkColumns += 1

        if dst_component:
            dst_component.arrivals.append(link_column)
            nLinkColumns += 1

    for i in range(len(schematic.components) - 1):
        component, next_component = schematic.components[
            i], schematic.components[i + 1]
        add_adjacent_connector_column(component, next_component, schematic)

    print(f"Created {nLinkColumns} LinkColumns")

    return schematic
コード例 #4
0
def add_adjacent_connector_column(component, next_component, schematic):
    """The last Departure LinkColumn is to the adjacent component
    Use logic to decide on which rows need adjacent connectors
    Start with the easy subtractive case of occupancy - departures and move to more complex,
    multiple copy cases."""
    adjacents = []
    for row in range(len(schematic.path_names)):
        connection_exists = False
        if component.occupants[row] and next_component.occupants[
                row]:  # occupant present
            # n_arrivals = sum([column.participants[row] for column in component.arrivals])
            departed = sum(
                [column.participants[row] for column in component.departures])
            # connection_exists = n_arrivals + 1 > departed
            connection_exists = not departed  # didn't depart
        adjacents.append(connection_exists)
    component.departures.append(
        LinkColumn(  # LinkColumn for adjacents
            component.last_bin,
            component.last_bin + 1,
            participants=adjacents))
コード例 #5
0
def segment_matrix(matrix: List[Path], bin_width, cells_per_file,
                   pangenome_length, no_adjacent_links,
                   parallel) -> PangenomeSchematic:
    from matrixcomponent import JSON_VERSION
    LOGGER.info(f"Starting Segmentation process on {len(matrix)} Paths.")
    schematic = PangenomeSchematic(JSON_VERSION, bin_width, 1, 1,
                                   not no_adjacent_links, [],
                                   [p.name
                                    for p in matrix], 1, pangenome_length)
    connections, dividers = dividers_with_max_size(matrix, cells_per_file)
    LOGGER.info(f"Created dividers")

    component_by_first_bin = {}
    component_by_last_bin = {}
    start_pos = 0
    for valid_start in dividers:
        if valid_start != 0:
            current = Component(start_pos, valid_start - 1)
            # current.active_members = 1
            schematic.components.append(current)
            component_by_first_bin[start_pos] = current
            component_by_last_bin[valid_start - 1] = current
        start_pos = valid_start
    LOGGER.info(f"Created {len(schematic.components)} components")

    # populate Component occupancy per Path
    populate_component_matrix(matrix, schematic)
    LOGGER.info(f"populated matrix")

    path_indices = connections['path_index']
    connections_from = connections['from']
    connections_to = connections['to']
    groups = utils.find_groups(connections_from, connections_to)

    for i in range(len(groups) - 1):
        start, end = groups[i], groups[i + 1]
        src, dst = int(connections_from[start]), int(
            connections_to[start])  # important to cast to int()

        link_column = LinkColumn(src,
                                 dst,
                                 participants=path_indices[start:end])

        src_component = component_by_last_bin.get(src)
        dst_component = component_by_first_bin.get(dst)

        if src_component:
            src_component.departures.append(link_column)

        if dst_component:
            dst_component.arrivals.append(link_column)

    if not no_adjacent_links:
        for i in range(len(schematic.components) - 1):
            component, next_component = schematic.components[
                i], schematic.components[i + 1]
            add_adjacent_connector_column(component, next_component, schematic)
        # add special case connectors for the last component in the file
        add_adjacent_connector_column(schematic.components[-1], None,
                                      schematic)

    num_link_columns = sum([(len(comp.departures) + len(comp.arrivals))
                            for comp in schematic.components])
    LOGGER.info(f"Created {num_link_columns} LinkColumns")

    schematic.prerender()

    return schematic