def read_lgn(filepath): edges_in_lgn = [] with open(filepath) as lgn_file: lgn_reader = csv.reader(lgn_file) lgn_reader.next() # skip the header row edges_in_lgn = [] for row in lgn_reader: gene_1, gene_2 = helper.swap(row[0], row[1]) edges_in_lgn.append([gene_1, gene_2]) genes_in_lgn = helper.genes_from_edges(edges_in_lgn) return genes_in_lgn, edges_in_lgn
def __connection_in_extra_genes_step_0(self, block): """ calculate c[0]: whether or not this 'extra gene' appear in a specific block """ control = dict() max_length = len(self.genes_in_lgn) + 2 genes_in_block = helper.genes_from_edges(block) extra_genes = set(genes_in_block) - set(self.genes_in_lgn) """ since we do not care how many time extra genes appear in block we ONLY pay attention to whether it appears or not """ for gene in extra_genes: control[gene] = [0] * max_length control[gene][0] = 1 # number of times it's connected with the LGN - c[1] # number of times it's connected with the subLGN - c[2] # will be calculated later return control
import os import csv import pdb import logging import helper def read_lgn(filepath): edges_in_lgn = [] with open(filepath) as lgn_file: lgn_reader = csv.reader(lgn_file, delimiter=';') lgn_reader.next() # skip the header row edges_in_lgn = [] for row in lgn_reader: try: gene_1, gene_2 = helper.swap(row[0], row[1]) edges_in_lgn.append([gene_1, gene_2]) except IndexError, e: logging.warning("%s is not in the correct format" % row) pass genes_in_lgn = helper.genes_from_edges(edges_in_lgn) return genes_in_lgn, edges_in_lgn