def leaderboard(): alert = None today = datetime.date.today() if today == datetime.date(2018, 4, 24): alert = "Remember to submit your work for peer review on LearningSpace by midnight tonight!" elif today == datetime.date(2018, 4, 25): alert = "Remember to complete your assigned peer reviews on LearningSpace by midnight tomorrow!" elif today == datetime.date(2018, 4, 26): alert = "Remember to complete your assigned peer reviews on LearningSpace by midnight tonight!" elif today <= datetime.date(2018, 4, 30): alert = "Remember to check MyFalmouth for the assessment deadline!" stats = [statistics.get_stats(bot["_id"] + '+' + class_name) for bot in db.bots.find({}) for class_name in (bot.get("class_names") or [])] for stat in stats: author, bot_id, class_name = stat["_id"].split('+') stat["bot"] = db.bots.find_one({"_id": author + '+' + bot_id}) stats.sort(key=lambda s: s["elo"], reverse=True) unready_bots = list(db.bots.find({"status": {"$ne": "ready"}})) matches_left = db.match_queue.find({}).count() if matches_left > 0: num_workers = len(glob.glob("run_matches*.pid")) time_left = matches_left * statistics.average(m["end_time"] - m["start_time"] for m in db.match_history.find({})) / num_workers else: time_left = None return flask.render_template("index.html", stats=stats, unready_bots=unready_bots, time_left=time_left, alert=alert)
def bot_info(bot_id): bot = db.bots.find_one({"_id": bot_id}) if bot is not None: bot_stats = [statistics.get_stats(bot_id + '+' + class_name) for class_name in bot["class_names"]] bot_stats.sort(key = lambda s: s["elo"], reverse = True) return flask.render_template("bot_info.html", bot=bot, stats=bot_stats) else: return flask.render_template("error.html", message="No bot named '%s'" % bot_id)
# print(x) if __name__ == '__main__': train_set = [ './datasets/j30/' + i for i in listdir('./datasets/j30') if i != "param.txt" ] validation_set = [] for i in range(1, 480, 10): validation_set.append("./datasets/RG300/datasets/RG300_" + str(i) + ".rcp") all_rg300 = ["./datasets/RG300/" + i for i in listdir('./datasets/RG300')] test_set = [i for i in all_rg300 if i not in validation_set] hard_starts = [101, 141, 261, 301, 421, 461] hard_test_tmp = [] for i in hard_starts: for j in range(i, i + 20): hard_test_tmp.append("./datasets/RG300/datasets/RG300_" + str(j) + ".rcp") hard_test = [i for i in hard_test_tmp if i not in validation_set] res = statistics.get_stats(instance, series_priority_rules, types, 'parallel', 'forward', use_precomputed=False, custom_set={'RG300': hard_test}, verbose=True)
def call_genotype(sam, chromA, chromB, posA_start, posA_end, posB_start, posB_end, db): s_arr = np.empty((0, 12), int) # Soft clipping alignment m_arr = np.empty((0, 4), int) # matched alignment with open(sam, "r") as sam_in: #=========================================================================================== # Read in sam file and check for soft clips inside the specific region. #=========================================================================================== for line in sam_in: breakA = 0 # Breakpoint A (start) will be calculated below breakB = 0 # Breakpoint B (alt mapping) will be calculated below if line[0] == "@": # Skip info lines continue if line[0] == '\n': # if newline in end of file, skip this continue else: line = line.upper().rstrip().split("\t") denovo_tool = line[0] alt_chrA = str(line[2]) if '.' in alt_chrA: # if the chromosome number contain a "." , it will be invalid and we will continue with next SV continue contig_start = int(line[3]) # start position for contig map_scoreA = int(line[4]) cigar = line[5] strandA = bam_flag(line[1]) if "S" in cigar: bad_quality = False # If there are several possible mate-mapping positions, this will be classified as bad quality and we will ignore these Breakpoints SA = False # Second mapping position count_split_posA, cigar_length_posA = cigar_count( cigar, strandA) breakA += int(contig_start) # Breakpoint A breakA += count_split_posA # look at mate position of split reads. Can be found at optional field starting with SA:Z for field in line: if field.startswith("SA:"): split_info = field.split(":") positions = split_info[-1] n_position = positions.split( ";" ) # split into number of positions. If more than one alternative position, skip! if len(n_position ) > 2: # due to one extra object; new line bad_quality = True break position = n_position[0].split(",") alt_chrB = str(position[0]) mate_pos_start = position[1] # strand if position[2] == "+": strandB = 0 elif position[2] == "-": strandB = 1 map_scoreB = position[4] count_split_posB, cigar_length_posB = cigar_count( position[3], strandB) breakB += int(mate_pos_start) breakB += count_split_posB SA = True if field.startswith("AS:"): field = field.split(":") contig_l = field[-1] if bad_quality: continue if SA == False: # If the split contig have no second mapping place, continue continue # count number of cigars, more cigars indicates untrustworthy SV. cigar_length = 0 cigar_length += cigar_length_posA cigar_length += cigar_length_posB # check if breakpoints fall inside desired region region = False chromA = str(chromA) chromB = str(chromB) if alt_chrA == chromA and alt_chrB == chromB: if breakA >= posA_start and breakA <= posA_end and breakB >= posB_start and breakB <= posB_end: region = True elif breakA >= posB_start and breakA <= posB_end and breakB >= posA_start and breakB <= posA_end: region = True elif alt_chrA == chromB and alt_chrB == chromA: if breakA >= posB_start and breakA <= posB_end and breakB >= posA_start and breakB <= posA_end: region = True if region: # If region is True, save to array seq = line[9] s_arr = np.append(s_arr, np.array([[ strandA, alt_chrA, breakA, map_scoreA, strandB, alt_chrB, breakB, map_scoreB, cigar_length, int(contig_l), seq, denovo_tool ]]), axis=0) # Matched contig elif "M" in cigar: count_match_pos, cigar_length_m = cigar_count( cigar, strandA ) # count the number of base pairs that match to reference genome match_region_end = 0 match_region_start = int(contig_start) match_region_end += match_region_start match_region_end += count_match_pos m_arr = np.append(m_arr, np.array([[ strandA, alt_chrA, match_region_start, match_region_end ]]), axis=0) #======================================================================================== # Continue analysing the cpntigs, or if no soft clipped contigs were found; look at read # coverage over region #======================================================================================== # if no breakpoints could be found. if len(s_arr) == 0: print 'No SV could be found using de novo assembly. Checking if read coverage information could be used to classify DEL, DUP and genotype' sv_type = "" genotype2 = "" if chromA == chromB: pos1 = posA_start + 1000 pos2 = posB_start + 1000 statistics = get_stats(db, chromA, pos1, pos2, 'same') # If there is no information about read coverage, skipp this variant if len(statistics) == 0: return s_arr, 'N/A', 'N/A', statistics # DELETION if statistics['RD_norm_1'] < (0.25 * statistics['RD_all']): sv_type = "DEL" genotype2 = "1/1" if statistics['RD_norm_1'] >= (0.25 * statistics['RD_all']): if statistics['RD_norm_1'] <= (0.75 * statistics['RD_all']): sv_type = "DEL" genotype2 = "0/1" # DUPLICATION if statistics['RD_norm_1'] >= (1.25 * statistics['RD_all']): if statistics['RD_norm_1'] < (1.75 * statistics['RD_all']): sv_type = "DUP" genotype2 = "0/1" if statistics['RD_norm_1'] >= (1.75 * statistics['RD_all']): sv_type = "DUP" genotype2 = "1/1" if sv_type != "" and genotype2 != "": sv_info = [chromA, pos1, pos2] # add genotype2 to stat dictionary statistics['genotype2'] = genotype2 return sv_info, genotype2, sv_type, statistics else: return s_arr, 'N/A', 'N/A', 'N/A' else: return s_arr, 'N/A', 'N/A', 'N/A' # returns N/A in order to continue the loop elif len(s_arr) > 0: # if one or more breakpoint was found # If several predicted SVs in s_arr; use the one with longest contig if len(s_arr) > 1: # Best breakpoint will be the one with largest mapped contig. seq_col = s_arr[:, 9] seq_col = seq_col.astype(np.int) best_bp_number = np.argmax(seq_col) best_breakpoint = s_arr[best_bp_number] if len(s_arr) == 1: best_breakpoint = s_arr[0] # Check if there is a matching contig to ref that will span over the predicted breakpoint. If there is; The genotype will be # classified as heterozygous. If we can't find any matching contig spanning the breakpoint, we classify this as homozygous genotype1 = "" for row in m_arr: if row[1] == best_breakpoint[1] and best_breakpoint[2] > row[ 2] and best_breakpoint[2] < row[3]: genotype1 = "0/1" if genotype1 == "": genotype1 = "1/1" if best_breakpoint[1] != best_breakpoint[ 5]: # breakpoints are located on different chromosomes -> break end sv_type = "tBND" genotype2 = "NA" statistics = get_stats(db, best_breakpoint[1], best_breakpoint[2], best_breakpoint[6], best_breakpoint[5]) # If there is no information about read coverage, skipp this variant if len(statistics) == 0: return s_arr, 'N/A', 'N/A', statistics print best_breakpoint, genotype1, sv_type, statistics, 'tBND' statistics['genotype2'] = genotype2 return best_breakpoint, genotype1, sv_type, statistics # Get statistics from SVGenT.db and read_cov.db if best_breakpoint[1] == best_breakpoint[5]: # if same chromosome statistics = get_stats(db, best_breakpoint[1], best_breakpoint[2], best_breakpoint[6], 'same') # If there is no information about read coverage, skipp this variant if len(statistics) == 0: return s_arr, 'N/A', 'N/A', statistics stat_map_score = statistics['map_1'] # mappability threshold, we do not want to keep SVs who have a low mappability score = no support for SV. if stat_map_score < 0.25: return s_arr, 'N/A', 'N/A', statistics # classify SV. else: if best_breakpoint[1] == best_breakpoint[ 5]: # breakpoints are located on the same chromosome print 'located on same chromosome' #=========================================================== # INV #=========================================================== if best_breakpoint[0] != best_breakpoint[ 4]: # sequences are in opposite directions -> inversed sv_type = "INV" genotype2 = "none" #=========================================================== # CNV #=========================================================== else: # DELETION if statistics['RD_norm_1'] < (0.25 * statistics['RD_all']): sv_type = "DEL" genotype2 = "1/1" elif statistics['RD_norm_1'] >= ( 0.25 * statistics['RD_all'] ) and statistics['RD_norm_1'] <= ( 0.75 * statistics['RD_all']): sv_type = "DEL" genotype2 = "0/1" # DUPLICATION elif statistics['RD_norm_1'] >= (1.25 * statistics['RD_all']): if statistics['RD_norm_1'] < ( 1.75 * statistics['RD_all']): sv_type = "DUP" genotype2 = "0/1" if statistics['RD_norm_1'] >= ( 1.75 * statistics['RD_all']): sv_type = "DUP" genotype2 = "1/1" else: sv_type = "BND" genotype2 = "none" # add genotype2 to stat dictionary statistics['genotype2'] = genotype2 return best_breakpoint, genotype1, sv_type, statistics
def show_stats(): stats, date_list, daily_requests, sound_stats = get_stats() #gets the values from statistics.py return render_template('stats.html', **locals())