def read_mat(file, target_chr = None): """Generator: for returning the contents of a mat file (e.g. hg18_genes.mat) on a given chromosome.""" wp = open(file) for line in wp: A = re.split("\s+", line.rstrip()) if (target_chr and A[1] != target_chr): continue partNo = int(A[0]) start = int(A[2]) finish = int(A[3]) family = A[5] M = sparse2matrix(" ".join(A[8:])) R = RptMatrix(None, None) R.class_name = A[5] R.rep_name = A[6] R.M = reduce_64matrix(M) yield partNo, start, finish, R
def FHK3(rpt_list, global_dist_file = None, tmp_file = None): """Calculate the fit by computing q and d values by aggrigrated family. q: Estimated by averaging the q over the families. d: Estimated for a family using the average over the instances of the family. """ # First: Create a list of fake repeats -- one for each family, where the C # matrices are combined. C_dic = {} for r in rpt_list: alpha = r.class_name if alpha not in C_dic: new_r = RptMatrix(None, None) new_r.class_name = alpha C_dic[alpha] = new_r C_dic[alpha].M += r.M family_rpt_list = C_dic.values() # Now we call algorithm 2 on the new repeat list return FHK1(family_rpt_list)