def positivity(address): ################# read canopy output ################# positivity_scores = BTree() with open(address,'r') as f: for line in f: canopy_members = line.strip().split() total = float(len(canopy_members)) number_of_positive = 0 for member in canopy_members: if membertag_to_class(member) == 1: number_of_positive = number_of_positive + 1 p_score = float(number_of_positive)/total for member in canopy_members: try: score_list = positivity_scores[member] score_list.append(p_score) positivity_scores[member] = score_list except: positivity_scores[member] = [p_score] ############### take average score ################### for key in positivity_scores.keys(): score_list = positivity_scores[key] sum = float(0) for item in score_list: sum = sum + item final_score = sum/float(len(score_list)) positivity_scores[key] = final_score ################ prepare output #################### output = '' for key,value in positivity_scores.items(): output = output + key + ' ' + str(value) + '\n' with open(positivity_outputs, 'w') as f: f.write(output)
def train_set_positivity(): data = BTree() with open("positivity_outputs.txt", "r") as f: for line in f: line = line.strip().split() sample = line[0].split("_")[0] seq_number = line[0].split("_")[1] score = line[1] try: curr = data[sample] curr.append((seq_number,score)) data[sample] = curr except: data[sample] = [(seq_number,score)] for key, val in data.items(): with open(os.path.join(train_set_positivity_scores, key), "w") as f: for item in val: f.write(item[0] + " " + item[1] + "\n")