Example #1
0
def read_mat(file, target_chr = None):
    """Generator: for returning the contents of a mat file (e.g. hg18_genes.mat) on a given chromosome."""
    wp = open(file)
    for line in wp:
        A = re.split("\s+", line.rstrip())
        if (target_chr and A[1] != target_chr):
            continue
        partNo = int(A[0])
        start = int(A[2])
        finish = int(A[3])
        family = A[5]
        M = sparse2matrix(" ".join(A[8:]))
        
        R = RptMatrix(None, None)
        R.class_name = A[5]
        R.rep_name = A[6]
        R.M = reduce_64matrix(M)
        yield partNo, start, finish, R
def FHK3(rpt_list, global_dist_file = None, tmp_file = None):
    """Calculate the fit by computing q and d values by aggrigrated family.
    q: Estimated by averaging the q over the families.
    d: Estimated for a family using the average over the instances of the family.
    """

    # First: Create a list of fake repeats -- one for each family, where the C
    # matrices are combined.
    C_dic = {}
    for r in rpt_list:
        alpha = r.class_name
        if alpha not in C_dic:
            new_r = RptMatrix(None, None)
            new_r.class_name = alpha
            C_dic[alpha] = new_r
        C_dic[alpha].M += r.M
    family_rpt_list = C_dic.values()

    # Now we call algorithm 2 on the new repeat list
    return FHK1(family_rpt_list)