Beispiel #1
0
def getRunSeq():
    """
    generate run seq, a seq list of pairs of
    indexes of profiles for job scheduling
    """
    # TODO needed to generalize based on input data type
    if os.path.isfile("ss_profiles.pickle"):
        ss_profiles = io.readPickle("ss_profiles.pickle")
    else:
        return False
    if os.path.isfile("contacts_route.pickle"):
        map_route = io.readPickle("contacts_route.pickle")
    elif os.path.isfile("pcs_route.pickle"):
        map_route = io.readPickle("pcs_route.pickle")
    elif os.path.isfile("rdc_route.pickle"):
        map_route = io.readPickle("rdc_route.pickle")
    print map_route
    s1, s2 = map_route[0][0], map_route[0][1]
    s1_list, s2_list = getPairSSProfiles(s1, s2, ss_profiles)

    run_seq = []
    for i in range(len(s1_list)):
        for j in range(len(s2_list)):
            run_seq.append([i, j])
    return run_seq
Beispiel #2
0
def getSSlist():
    ss_profiles = io.readPickle("ss_profiles.pickle")
    if os.path.isfile("contacts_route.pickle"):
        map_route = io.readPickle("contacts_route.pickle")
    elif os.path.isfile("pcs_route.pickle"):
        map_route = io.readPickle("pcs_route.pickle")
    elif os.path.isfile("rdc_route.pickle"):
        map_route = io.readPickle("rdc_route.pickle")

    s1, s2 = map_route[0][0], map_route[0][1]
    s1_list, s2_list = getPairSSProfiles(s1, s2, ss_profiles)
    return s1_list, s2_list
Beispiel #3
0
def start_top_hits(num_hits, stage, smotif_index):
    """
    generate run seq, a seq list of pairs of
    indexes of profiles for job scheduling
    """
    map_route = []
    ss_profiles = io.readPickle("ss_profiles.pickle")
    if os.path.isfile("contacts_route.pickle"):
        map_route = io.readPickle("contacts_route.pickle")
    elif os.path.isfile("pcs_route.pickle"):
        map_route = io.readPickle("pcs_route.pickle")
    elif os.path.isfile("rdc_route.pickle"):
        map_route = io.readPickle("rdc_route.pickle")
        map_route_alt = io.readPickle("rdc_route_alt.pickle")

    alt_smotif_defs = map_route_alt[smotif_index]

    top_hits = []
    top_hit_file = str(smotif_index - 1) + "_refined_tophits.gzip"
    if os.path.isfile(top_hit_file):
        top_hits = io.readGzipPickle(top_hit_file)

        print "loading from prevously assembled refined_tophits.pickle file"
        print "# hits :", len(top_hits)
    else:
        top_hit_file = str(smotif_index - 1) + "_tophits.gzip"
        if os.path.isfile(top_hit_file):
            top_hits = io.readGzipPickle(top_hit_file)
            print "loading from prevously assembled tophits.pickle file"
            print "# hits :", len(top_hits)
        else:
            print "No previous tophits file found, Generating a new one"
            return "exception"

    if not top_hits:
        return False, False

    run_seq = []
    for next_smotif in alt_smotif_defs:
        print next_smotif
        direction = next_smotif[-1]
        if direction == 'left':
            next_ss_list = ss_profiles[next_smotif[0]]
        else:
            next_ss_list = ss_profiles[next_smotif[1]]

        for i in range(len(top_hits)):
            for j in range(len(next_ss_list)):
                run_seq.append([i, j, next_smotif])

    return run_seq, smotif_index
Beispiel #4
0
def getPreviousSmotif(index):

    if os.path.isfile("contacts_route.pickle"):
        map_route = io.readPickle("contacts_route.pickle")
    elif os.path.isfile("pcs_route.pickle"):
        map_route = io.readPickle("pcs_route.pickle")
    elif os.path.isfile("rdc_route.pickle"):
        map_route = io.readPickle("rdc_route.pickle")

    next_index, next_smotif = getNextSmotif(map_route)
    top_hits = io.readPickle(str(next_index - 1) +
                             "_tophits.pickle")  # Read in previous index hits
    # print len(top_hits)
    return top_hits[index]
Beispiel #5
0
def getRunSeq(num_hits, stage):
    """
    generate run seq, a seq list of pairs of
    indexes of profiles for job scheduling
    """
    map_route = []
    ss_profiles = io.readPickle("ss_profiles.pickle")
    if os.path.isfile("contacts_route.pickle"):
        map_route = io.readPickle("contacts_route.pickle")
    elif os.path.isfile("pcs_route.pickle"):
        map_route = io.readPickle("pcs_route.pickle")
    elif os.path.isfile("rdc_route.pickle"):
        map_route = io.readPickle("rdc_route.pickle")

    try:
        next_index, next_smotif = getNextSmotif(map_route)
        print next_index, next_smotif
    except TypeError:
        return [999], 999

    direction = next_smotif[-1]
    if direction == 'left':
        next_ss_list = ss_profiles[next_smotif[0]]
    else:
        next_ss_list = ss_profiles[next_smotif[1]]
    # get and make a list of top 10(n) of the previous run
    top_hits = makeTopPickle(next_index - 1, num_hits,
                             stage)  # send the previous Smotif index

    # delete two stages down pickled files
    check_pickle = str(next_index - 2) + str("_*_*.pickle")
    file_list = glob.glob(check_pickle)

    if len(file_list) > 10:
        remove = "rm " + check_pickle
        os.system(remove)

    if top_hits:
        run_seq = []
        for i in range(len(top_hits)):
            for j in range(len(next_ss_list)):
                run_seq.append([i, j])
        return run_seq, next_index
Beispiel #6
0
def getSS2(index):

    if os.path.isfile("contacts_route.pickle"):
        map_route = io.readPickle("contacts_route.pickle")
    elif os.path.isfile("pcs_route.pickle"):
        map_route = io.readPickle("pcs_route.pickle")
    elif os.path.isfile("rdc_route.pickle"):
        map_route = io.readPickle("rdc_route.pickle")

    ss_profiles = io.readPickle("ss_profiles.pickle")

    next_index, next_smotif = getNextSmotif(map_route)
    direction = next_smotif[-1]

    if direction == 'left':
        next_ss_list = ss_profiles[next_smotif[0]]
    else:
        next_ss_list = ss_profiles[next_smotif[1]]

    return next_ss_list[index], direction
Beispiel #7
0
def getSS2(index, next_smotif):
    """

    :param index:
    :param next_smotif:
    :return:
    """

    ss_profiles = io.readPickle("ss_profiles.pickle")
    direction = next_smotif[-1]

    if direction == 'left':
        next_ss_list = ss_profiles[next_smotif[0]]
    else:
        next_ss_list = ss_profiles[next_smotif[1]]

    return next_ss_list[index], direction, next_smotif
Beispiel #8
0
def makeTopPickle(previous_smotif_index, num_hits, stage):
    """
    Concatenate data from all of the threads, organize, remove redundancies, rank
     and extract top hits as defined
    :param previous_smotif_index:
    :param num_hits:
    :param stage:
    :return:
    """
    hits = []
    regex = str(previous_smotif_index) + "_*_*.pickle"
    file_list = glob.glob(regex)
    for f in file_list:
        t_hits = io.readPickle(f)
        for t_hit in t_hits:
            hits.append(t_hit)
    """
    identifiers: smotif, smotif_def, seq_filter, contacts_filter, PCS_filter, qcp_rmsd, Evofilter
                 RDC_filter, NOE_filter
    """

    new_dict = collections.defaultdict(list)
    pcs_filter = False
    contact_filter = False
    rdc_filter = False
    noe_filter = False
    for hit in hits:
        # thread_data contains data from each search and filter thread.
        for data_filter in hit:
            if data_filter[0] == 'PCS_filter':
                pcs_filter = True
                pcs_data = data_filter
                Nchi = getNchiSum(pcs_data, stage)
                # new_dict.setdefault(Nchi, []).append(entry)
                new_dict[Nchi].append(hit)

            if data_filter[0] == 'Evofilter':
                contact_filter = True
                new_dict[data_filter[1]].append(hit)

            if data_filter[0] == 'RDC_filter':
                rdc_filter = True
                rdc_data = data_filter
                Nchi = rdcSumChi(rdc_data, stage)
                for filter in hit:
                    if filter[0] == 'NOE_filter':
                        noe_filter = True
                        noe_fmeasure = filter[1]
                        Nchi = Nchi / math.pow(10, noe_fmeasure * 10)
                        new_dict[Nchi].append(hit)
                if not noe_filter:
                    new_dict[Nchi].append(hit)

    # ************************************************
    # Exclude the redundant entries and rank top hits
    # ************************************************

    keys = new_dict.keys()
    keys.sort()
    if contact_filter and not pcs_filter:
        # Contact filter data should be as high as possible
        keys.reverse()

    # Exclude the redundant data.

    # non_redundant = {}
    non_redundant = collections.defaultdict(list)
    seqs = []
    smotif_seq = ''
    Nchi = 0.0
    for i in range(0, len(keys)):
        entries = new_dict[keys[i]]
        for entry in entries:
            for ent in entry:
                if ent[0] == 'smotif':
                    name = ent[1][0]
                if ent[0] == 'seq_filter':
                    seq_filter = ent
                    smotif_seq = seq_filter[1]
                if ent[0] == 'PCS_filter':
                    pcs_data = ent
                    Nchi = getNchiSum(pcs_data, stage)
                if ent[0] == 'Evofilter':
                    Nchi = ent[1]
                if ent[0] == 'RDC_filter':
                    rdc_data = ent
                    Nchi = rdcSumChi(rdc_data, stage)
                    if noe_filter:
                        for ent in entry:
                            if ent[0] == 'NOE_filter':
                                noe_fmeasure = ent[1]
                                Nchi = Nchi / math.pow(10, noe_fmeasure * 10)
                    else:
                        Nchi = rdcSumChi(rdc_data, stage)

            if smotif_seq not in seqs:
                seqs.append(smotif_seq)
                # non_redundant.setdefault(Nchi, []).append(entry)
                non_redundant[Nchi].append(entry)

    # Rank top hits and dump the data
    keys = non_redundant.keys()
    keys.sort()
    if contact_filter and not pcs_filter:
        keys.reverse()
    dump_pickle = []
    print "Dumping data to disk"
    count_top_hits = 0
    while (True):
        for key in keys:
            if key == 999.999:
                # Do not work on these entries
                continue
            entries = non_redundant[key]
            for entry in entries:
                dump_pickle.append(entry)
                print "final sele", entry[0][1][0][0], key
                count_top_hits += 1
            if count_top_hits >= num_hits:
                break
        if count_top_hits >= num_hits:
            break
        else:
            print "could only extract ", count_top_hits
            break

    io.dumpPickle(str(previous_smotif_index) + "_tophits.pickle", dump_pickle)
    print "actual number in top hits ", len(dump_pickle)
    return range(count_top_hits)