Ejemplo n.º 1
0
def leaderboard():
    alert = None
    today = datetime.date.today()
    if today == datetime.date(2018, 4, 24):
        alert = "Remember to submit your work for peer review on LearningSpace by midnight tonight!"
    elif today == datetime.date(2018, 4, 25):
        alert = "Remember to complete your assigned peer reviews on LearningSpace by midnight tomorrow!"
    elif today == datetime.date(2018, 4, 26):
        alert = "Remember to complete your assigned peer reviews on LearningSpace by midnight tonight!"
    elif today <= datetime.date(2018, 4, 30):
        alert = "Remember to check MyFalmouth for the assessment deadline!"

    stats = [statistics.get_stats(bot["_id"] + '+' + class_name)
             for bot in db.bots.find({})
             for class_name in (bot.get("class_names") or [])]
    
    for stat in stats:
        author, bot_id, class_name = stat["_id"].split('+')
        stat["bot"] = db.bots.find_one({"_id": author + '+' + bot_id})
    
    stats.sort(key=lambda s: s["elo"], reverse=True)
    
    unready_bots = list(db.bots.find({"status": {"$ne": "ready"}}))
    
    matches_left = db.match_queue.find({}).count()
    if matches_left > 0:
        num_workers = len(glob.glob("run_matches*.pid"))
        time_left = matches_left * statistics.average(m["end_time"] - m["start_time"] for m in db.match_history.find({})) / num_workers
    else:
        time_left = None

    return flask.render_template("index.html", stats=stats, unready_bots=unready_bots, time_left=time_left, alert=alert)
Ejemplo n.º 2
0
def bot_info(bot_id):
    bot = db.bots.find_one({"_id": bot_id})

    if bot is not None:
        bot_stats = [statistics.get_stats(bot_id + '+' + class_name) for class_name in bot["class_names"]]
        bot_stats.sort(key = lambda s: s["elo"], reverse = True)
        return flask.render_template("bot_info.html", bot=bot, stats=bot_stats)
    else:
        return flask.render_template("error.html", message="No bot named '%s'" % bot_id)
Ejemplo n.º 3
0
# print(x)
if __name__ == '__main__':
    train_set = [
        './datasets/j30/' + i for i in listdir('./datasets/j30')
        if i != "param.txt"
    ]
    validation_set = []
    for i in range(1, 480, 10):
        validation_set.append("./datasets/RG300/datasets/RG300_" + str(i) +
                              ".rcp")
    all_rg300 = ["./datasets/RG300/" + i for i in listdir('./datasets/RG300')]
    test_set = [i for i in all_rg300 if i not in validation_set]
    hard_starts = [101, 141, 261, 301, 421, 461]
    hard_test_tmp = []
    for i in hard_starts:
        for j in range(i, i + 20):
            hard_test_tmp.append("./datasets/RG300/datasets/RG300_" + str(j) +
                                 ".rcp")

    hard_test = [i for i in hard_test_tmp if i not in validation_set]

    res = statistics.get_stats(instance,
                               series_priority_rules,
                               types,
                               'parallel',
                               'forward',
                               use_precomputed=False,
                               custom_set={'RG300': hard_test},
                               verbose=True)
Ejemplo n.º 4
0
def call_genotype(sam, chromA, chromB, posA_start, posA_end, posB_start,
                  posB_end, db):
    s_arr = np.empty((0, 12), int)  # Soft clipping alignment
    m_arr = np.empty((0, 4), int)  # matched alignment
    with open(sam, "r") as sam_in:

        #===========================================================================================
        #  Read in sam file and check for soft clips inside the specific region.
        #===========================================================================================
        for line in sam_in:

            breakA = 0  # Breakpoint A (start) will be calculated below
            breakB = 0  # Breakpoint B (alt mapping) will be calculated below
            if line[0] == "@":  # Skip info lines
                continue
            if line[0] == '\n':  # if newline in end of file, skip this
                continue

            else:
                line = line.upper().rstrip().split("\t")
                denovo_tool = line[0]
                alt_chrA = str(line[2])
                if '.' in alt_chrA:  # if the chromosome number contain a "." , it will be invalid and we will continue with next SV
                    continue
                contig_start = int(line[3])  # start position for contig
                map_scoreA = int(line[4])
                cigar = line[5]
                strandA = bam_flag(line[1])

                if "S" in cigar:
                    bad_quality = False  # If there are several possible mate-mapping positions, this will be classified as bad quality and we will ignore these Breakpoints
                    SA = False  # Second mapping position
                    count_split_posA, cigar_length_posA = cigar_count(
                        cigar, strandA)
                    breakA += int(contig_start)  # Breakpoint A
                    breakA += count_split_posA

                    # look at mate position of split reads. Can be found at optional field starting with SA:Z
                    for field in line:
                        if field.startswith("SA:"):
                            split_info = field.split(":")
                            positions = split_info[-1]
                            n_position = positions.split(
                                ";"
                            )  # split into number of positions. If more than one alternative position, skip!
                            if len(n_position
                                   ) > 2:  # due to one extra object; new line
                                bad_quality = True
                                break
                            position = n_position[0].split(",")
                            alt_chrB = str(position[0])
                            mate_pos_start = position[1]

                            # strand
                            if position[2] == "+":
                                strandB = 0
                            elif position[2] == "-":
                                strandB = 1
                            map_scoreB = position[4]

                            count_split_posB, cigar_length_posB = cigar_count(
                                position[3], strandB)

                            breakB += int(mate_pos_start)
                            breakB += count_split_posB
                            SA = True

                        if field.startswith("AS:"):
                            field = field.split(":")
                            contig_l = field[-1]

                    if bad_quality:
                        continue
                    if SA == False:  # If the split contig have no second mapping place, continue
                        continue
                    # count number of cigars, more cigars indicates untrustworthy SV.
                    cigar_length = 0
                    cigar_length += cigar_length_posA
                    cigar_length += cigar_length_posB

                    # check if breakpoints fall inside desired region
                    region = False
                    chromA = str(chromA)
                    chromB = str(chromB)
                    if alt_chrA == chromA and alt_chrB == chromB:
                        if breakA >= posA_start and breakA <= posA_end and breakB >= posB_start and breakB <= posB_end:
                            region = True
                        elif breakA >= posB_start and breakA <= posB_end and breakB >= posA_start and breakB <= posA_end:
                            region = True
                    elif alt_chrA == chromB and alt_chrB == chromA:
                        if breakA >= posB_start and breakA <= posB_end and breakB >= posA_start and breakB <= posA_end:
                            region = True

                    if region:  # If region is True, save to array
                        seq = line[9]
                        s_arr = np.append(s_arr,
                                          np.array([[
                                              strandA, alt_chrA, breakA,
                                              map_scoreA, strandB, alt_chrB,
                                              breakB, map_scoreB, cigar_length,
                                              int(contig_l), seq, denovo_tool
                                          ]]),
                                          axis=0)

                # Matched contig
                elif "M" in cigar:
                    count_match_pos, cigar_length_m = cigar_count(
                        cigar, strandA
                    )  # count the number of base pairs that match to reference genome
                    match_region_end = 0
                    match_region_start = int(contig_start)
                    match_region_end += match_region_start
                    match_region_end += count_match_pos
                    m_arr = np.append(m_arr,
                                      np.array([[
                                          strandA, alt_chrA,
                                          match_region_start, match_region_end
                                      ]]),
                                      axis=0)

    #========================================================================================
    #  Continue analysing the cpntigs, or if no soft clipped contigs were found; look at read
    #  coverage over region
    #========================================================================================

    # if no breakpoints could be found.
    if len(s_arr) == 0:
        print 'No SV could be found using de novo assembly. Checking if read coverage information could be used to classify DEL, DUP and genotype'
        sv_type = ""
        genotype2 = ""
        if chromA == chromB:
            pos1 = posA_start + 1000
            pos2 = posB_start + 1000

            statistics = get_stats(db, chromA, pos1, pos2, 'same')
            # If there is no information about read coverage, skipp this variant
            if len(statistics) == 0:
                return s_arr, 'N/A', 'N/A', statistics

            # DELETION
            if statistics['RD_norm_1'] < (0.25 * statistics['RD_all']):
                sv_type = "DEL"
                genotype2 = "1/1"
            if statistics['RD_norm_1'] >= (0.25 * statistics['RD_all']):
                if statistics['RD_norm_1'] <= (0.75 * statistics['RD_all']):
                    sv_type = "DEL"
                    genotype2 = "0/1"
            # DUPLICATION
            if statistics['RD_norm_1'] >= (1.25 * statistics['RD_all']):
                if statistics['RD_norm_1'] < (1.75 * statistics['RD_all']):
                    sv_type = "DUP"
                    genotype2 = "0/1"
                if statistics['RD_norm_1'] >= (1.75 * statistics['RD_all']):
                    sv_type = "DUP"
                    genotype2 = "1/1"

            if sv_type != "" and genotype2 != "":
                sv_info = [chromA, pos1, pos2]
                # add genotype2 to stat dictionary
                statistics['genotype2'] = genotype2

                return sv_info, genotype2, sv_type, statistics

            else:
                return s_arr, 'N/A', 'N/A', 'N/A'

        else:
            return s_arr, 'N/A', 'N/A', 'N/A'  # returns N/A in order to continue the loop

    elif len(s_arr) > 0:  # if one or more breakpoint was found
        # If several predicted SVs in s_arr; use the one with longest contig
        if len(s_arr) > 1:
            # Best breakpoint will be the one with largest mapped contig.
            seq_col = s_arr[:, 9]
            seq_col = seq_col.astype(np.int)
            best_bp_number = np.argmax(seq_col)
            best_breakpoint = s_arr[best_bp_number]

        if len(s_arr) == 1:
            best_breakpoint = s_arr[0]

        # Check if there is a matching contig to ref that will span over the predicted breakpoint. If there is; The genotype will be
        # classified as heterozygous. If we can't find any matching contig spanning the breakpoint, we classify this as homozygous
        genotype1 = ""
        for row in m_arr:
            if row[1] == best_breakpoint[1] and best_breakpoint[2] > row[
                    2] and best_breakpoint[2] < row[3]:
                genotype1 = "0/1"
        if genotype1 == "":
            genotype1 = "1/1"

        if best_breakpoint[1] != best_breakpoint[
                5]:  # breakpoints are located on different chromosomes -> break end
            sv_type = "tBND"
            genotype2 = "NA"
            statistics = get_stats(db, best_breakpoint[1], best_breakpoint[2],
                                   best_breakpoint[6], best_breakpoint[5])
            # If there is no information about read coverage, skipp this variant
            if len(statistics) == 0:
                return s_arr, 'N/A', 'N/A', statistics
            print best_breakpoint, genotype1, sv_type, statistics, 'tBND'
            statistics['genotype2'] = genotype2
            return best_breakpoint, genotype1, sv_type, statistics

        # Get statistics from SVGenT.db and read_cov.db
        if best_breakpoint[1] == best_breakpoint[5]:  # if same chromosome
            statistics = get_stats(db, best_breakpoint[1], best_breakpoint[2],
                                   best_breakpoint[6], 'same')

            # If there is no information about read coverage, skipp this variant
            if len(statistics) == 0:
                return s_arr, 'N/A', 'N/A', statistics

            stat_map_score = statistics['map_1']

            # mappability threshold, we do not want to keep SVs who have a low mappability score = no support for SV.
            if stat_map_score < 0.25:
                return s_arr, 'N/A', 'N/A', statistics

            # classify SV.
            else:
                if best_breakpoint[1] == best_breakpoint[
                        5]:  # breakpoints are located on the same chromosome
                    print 'located on same chromosome'
                    #===========================================================
                    #  INV
                    #===========================================================
                    if best_breakpoint[0] != best_breakpoint[
                            4]:  # sequences are in opposite directions -> inversed
                        sv_type = "INV"
                        genotype2 = "none"

                    #===========================================================
                    # CNV
                    #===========================================================
                    else:
                        # DELETION
                        if statistics['RD_norm_1'] < (0.25 *
                                                      statistics['RD_all']):
                            sv_type = "DEL"
                            genotype2 = "1/1"
                        elif statistics['RD_norm_1'] >= (
                                0.25 * statistics['RD_all']
                        ) and statistics['RD_norm_1'] <= (
                                0.75 * statistics['RD_all']):
                            sv_type = "DEL"
                            genotype2 = "0/1"
                        # DUPLICATION
                        elif statistics['RD_norm_1'] >= (1.25 *
                                                         statistics['RD_all']):
                            if statistics['RD_norm_1'] < (
                                    1.75 * statistics['RD_all']):
                                sv_type = "DUP"
                                genotype2 = "0/1"
                            if statistics['RD_norm_1'] >= (
                                    1.75 * statistics['RD_all']):
                                sv_type = "DUP"
                                genotype2 = "1/1"
                        else:
                            sv_type = "BND"
                            genotype2 = "none"

                # add genotype2 to stat dictionary
                statistics['genotype2'] = genotype2

            return best_breakpoint, genotype1, sv_type, statistics
Ejemplo n.º 5
0
def show_stats():
    stats, date_list, daily_requests, sound_stats = get_stats() #gets the values from statistics.py
    return render_template('stats.html', **locals())