def get_top_series(grade_level, hard_seq, score_format): if grade_level.lower() == "freshman_10": top_set = utils.list_from_file("/Users/thomasolson/Documents/workspace/advising_revamp/series analysis/top1_subset_all_possible_series_10_old_cir_only_score_freshman_bonus_412add_211add.csv", "\n", ",", False) elif grade_level.lower() == "freshman_8": top_set = utils.list_from_file("/Users/thomasolson/Documents/workspace/advising_revamp/series analysis/7_25_top_1_impact_summary_seq.csv", "\n", ",", False) elif grade_level == "sfsu_seq_check": top_set = utils.list_from_file( "/Users/thomasolson/Documents/workspace/advising_revamp/series analysis/SFSU_Recommended_Seq.csv", "\n", ",", False) else: top_set = utils.list_from_file("/Users/thomasolson/Documents/workspace/advising_revamp/series analysis/all_possible_series_xfer_6_scored_412add.csv", "\n", ",", False) parsed_series = [] for series in top_set: temp_list = [] y =0 if score_format: y=1 for x in range (y, len(series)): ser = series[x] if score_format: ser = ser.strip("[]\\' ") crs = ser.split("_")[1] count = ser.split("_")[0] if hard_seq: temp_list.append(counter_list[int(count)]+"_"+crs) #temp_list.append(count+"_"+crs) else: temp_list.append(crs) parsed_series.append(temp_list) return parsed_series
def main_generate_student_data(name_path, core_path, elective_path): cnx = utils.get_connection("advisor", "passadvise", "localhost", "ADVISING") name_data = utils.list_from_file(name_path, "\n", ",", False) elective_data = utils.list_from_file(elective_path, "\n", ",", False) core_data = utils.list_from_file(core_path, "\n", ",", False) students = generate_student(name_data, elective_data[:], core_data[:]) persist_students(students, cnx) cnx.close()
def check_class_load_effect(core_path, elective_path, testers): elective_data = utils.list_from_file(elective_path, "\n", ",", False) core_data = utils.list_from_file(core_path, "\n", ",", False) exceptional_classes = [] for course in core_data: if course[3] != "1": #BE SURE TO CHECK THIS CORRECT CHECK exceptional_classes.append(course[0]) for course in elective_data: if course[3] != "1": exceptional_classes.append(course[0]) grade_sets_no_penalty = [] grade_sets_penalty = [] student_histories = gather_student_history(testers) for hist in student_histories: semesters = semester_dissolve(hist) for semester in semesters: course_count = len(semesters[semester]) semester_courses = semesters[semester] penalty_class = False grade_set = [] for course in semester_courses: if course[0] in exceptional_classes: penalty_class = True grade_set.append(float(course[1])) if (penalty_class and course_count >= 5) or course_count > 5: #grade_sets_penalty.append(float(course[1])) grade_sets_penalty.append(numpy.mean(grade_set)) else: #grade_sets_no_penalty.append(float(course[1])) grade_sets_no_penalty.append(numpy.mean(grade_set)) prefix = "student" if testers: prefix = "cohort" print(prefix + " penalty course") print("mean : %.4f " % numpy.mean(grade_sets_penalty)) print("median : %.4f" % numpy.median(grade_sets_penalty)) print("std : %.4f" % numpy.std(grade_sets_penalty)) print(prefix + " normal course") print("mean : %.4f " % numpy.mean(grade_sets_no_penalty)) print("median : %.4f" % numpy.median(grade_sets_no_penalty)) print("std : %.4f" % numpy.std(grade_sets_no_penalty)) trace0 = go.Box(y=grade_sets_no_penalty, name=prefix + ' normal course grades ', marker=dict(color='rgb(214, 12, 140)', )) trace1 = go.Box(y=grade_sets_penalty, name=prefix + ' penalty course grades', marker=dict(color='rgb(0, 128, 128)', )) data = [trace0, trace1] py.plot(data, filename="feb4test_coure_load_penalty")
def check_bonus_effect(core_path, elective_path, testers): elective_data = utils.list_from_file(elective_path, "\n", ",", False) core_data = utils.list_from_file(core_path, "\n", ",", False) bonus_class_ref = {} for course in core_data: if course[5] != "": bonus_class_ref[course[0]] = course[5] for course in elective_data: if course[5] != "": bonus_class_ref[course[0]] = course[5] grade_sets_no_bonus = [] grade_sets_bonus = [] student_histories = gather_student_history(testers) for hist in student_histories: taken_classes = [] for datas in hist: if datas[0] in bonus_class_ref: bonus_classes = bonus_class_ref[datas[0]].split(";") found = False for b_class in bonus_classes: if b_class in taken_classes: found = True if found: grade_sets_bonus.append(float(datas[1])) else: grade_sets_no_bonus.append(float(datas[1])) taken_classes.append(datas[0]) prefix = "student" if testers: prefix = "cohort" print(prefix + " bonus course") print("mean : %.4f " % numpy.mean(grade_sets_bonus)) print("median : %.4f" % numpy.median(grade_sets_bonus)) print("std : %.4f" % numpy.std(grade_sets_bonus)) print(prefix + " normal course") print("mean : %.4f " % numpy.mean(grade_sets_no_bonus)) print("median : %.4f" % numpy.median(grade_sets_no_bonus)) print("std : %.4f" % numpy.std(grade_sets_no_bonus)) trace0 = go.Box(y=grade_sets_bonus, name=prefix + ' grade_sets_bonus', marker=dict(color='rgb(214, 12, 140)', )) trace1 = go.Box(y=grade_sets_no_bonus, name=prefix + ' grade_sets_no_bonus', marker=dict(color='rgb(0, 128, 128)', )) data = [trace0, trace1] py.plot(data, filename="feb4tests_grade_penalty")
def run_student_vectors(core_path, elective_path, request_type, vect_type, base_dir, sim_path): elective_data = utils.list_from_file(elective_path, "\n", ",", False) core_data = utils.list_from_file(core_path, "\n", ",", False) class_dict = build_class_key_vector(core_data, elective_data, request_type) student_list = utils.get_students_history() student_vects = {} for student in student_list: vect = build_student_vector(student, class_dict, request_type, vect_type) student_vects[student] = vect sim_path = sim_path + request_type + "_" + vect_type + ".csv" cluster(student_vects, request_type, vect_type, base_dir, sim_path) return
def read_classes(**kwargs): course_list = [] if 'core_path' in kwargs: core_data = utils.list_from_file(kwargs['core_path'], "\n", ",", False) for course in core_data: course_list.append(course[0].strip()) if 'elective_path' in kwargs: elective_data = utils.list_from_file(kwargs['elective_path'], "\n", ",", False) for course in elective_data: course_list.append(course[0].strip()) return course_list
def label_student_prepardness( students ): #checks for student preparedness based on unprepared file list # Run as part of init. unprepard_list = utils.list_from_file("./unprepared_courses", "\n", ",", False) change_list = [] for student in students: course_history = student.course_history label = set() for crs in course_history: if crs.name in unprepard_list: if "MATH" in crs.name: label.add("UN_MATH") if "PHYS" in crs.name: label.add("UN_PHYS") if len(label) == 0: continue if len(label) == 1: student.prep_assess = label.pop() student.prep_assess_summary = "1" else: student.prep_assess = label.pop() + ";" + label.pop() student.prep_assess_summary = "1" change_list.append(student) return change_list
def load_annotations(self, ann_file): data_infos = [] img_ids = list_from_file(ann_file) for img_id in img_ids: filename = f'JPEGImages/{img_id}.jpg' xml_path = osp.join(self.img_prefix, 'Annotations', f'{img_id}.xml') tree = ET.parse(xml_path) root = tree.getroot() size = root.find('size') width = 0 height = 0 if size is not None: width = int(size.find('width').text) height = int(size.find('height').text) else: img_path = osp.join(self.img_prefix, 'JPEGImages', '{}.jpg'.format(img_id)) img = Image.open(img_path) width, height = img.size ann = self.get_ann_info(root) ann['bboxes'][:, 2] = ann['bboxes'][:, 2] - ann['bboxes'][:, 0] ann['bboxes'][:, 3] = ann['bboxes'][:, 3] - ann['bboxes'][:, 1] data_infos.append( dict(id=img_id, filename=filename, width=width, height=height, ann=ann)) return data_infos
def import_students_dict(): students_raw = utils.list_from_file( "/Users/thomasolson/Documents/workspace/advising_revamp/sfsu_data_v2.csv", "\n", ",", True) student_dict = it.build_student_dict(students_raw, student_id=0, sex=4, ethnic=5, age=6, resident_status=7, standing=8, entry_major=13, final_major=33, spring_19_major=37, admin_descript=9, crs_abbr=15, crs_num=16, grade_str=21, year_int=3, term_gpa=22, sfsu_gpa=23, term_units=25, sfsu_units=26, grad_flag=31, spring_19_flag=35, crs_college_long=19, crs_dept_long=20, total_units=27) return student_dict
def get_class_prereqs(): prereq_cat = {} crs_list = utils.list_from_file("./cs_prereqs", "\n", ",", False) for crs in crs_list: name = crs[0] if "^" in crs[1]: preq = crs[1].split("^") else: preq = crs[1].split(";") prereq_cat[name] = preq return prereq_cat
def precompute_sim(core_path, elective_path, request_type, vect_type, outpath): elective_data = utils.list_from_file(elective_path, "\n", ",", False) core_data = utils.list_from_file(core_path, "\n", ",", False) class_dict = build_class_key_vector(core_data, elective_data, request_type) student_list = utils.get_students_history() student_vects = {} for student in student_list: vect = build_student_vector(student, class_dict, request_type, vect_type) student_vects[student] = vect output = [] for x in range(0, len(student_list)): student_a_vect = utils.grade_vect_to_bit( student_vects[student_list[x]]) print(x) for y in range(x + 1, len(student_list)): student_b_vect = utils.grade_vect_to_bit( student_vects[student_list[y]]) tani = 1.0 - jaccard_similarity_score(student_a_vect, student_b_vect) output.append( str(student_list[x].id_num) + "," + str(student_list[y].id_num) + "," + str(tani)) utils.list_to_file(outpath, output)
from utils import list_from_file vocab = list_from_file('resource/vocabulary.txt') files_changed = [] files_changed.append('data/corpus_masked_train.txt') for filename in files_changed: data = list_from_file(filename) for i in range(len(data)): sentence = data[i] sequence = sentence.split() for j in range(len(sequence)): if sequence[j] not in vocab: sequence[j] = '<UNK>' sentence = ' '.join(sequence) + '\n' data[i] = sentence output_filename = filename.split('.')[0] output_filename += '_preprocessed.txt' with open(output_filename, 'w') as file: file.writelines(data)
from src.augmentation import BasicTextAugmentation from utils import list_from_file from feature_extraction import FeatureExtractor from keras.models import load_model from keras import backend as K import tensorflow as tf from keras.preprocessing.text import text_to_word_sequence import json config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.5 K.tensorflow_backend.set_session(tf.Session(config=config)) ta = BasicTextAugmentation() vocab = list_from_file('resource/vocabulary.txt') w2v_pathname = 'resource/w2v_path.txt' fe = FeatureExtractor(5) fe.set_w2v(w2v_pathname, 500, keep_alive=True) class BengioAugmenter(): def __init__(self, model_filename): self.gram = 5 self.model = load_model(model_filename) def prob(self, sequences): for i in range(len(sequences)): if sequences[i] not in vocab: if sequences[i].isdigit(): sequences[i] = '<NUM>' else:
#!/bin/python3 import subprocess import utils apt_apps: "list[str]" = utils.list_from_file("apt-apps.txt") apt_ppas: "******" = utils.list_from_file("apt-ppas.txt") def handle_ppas(ppas: '******'): print(f"\nNumber of ppas: {len(ppas)}") print(f"List of ppas: {ppas}\n") for ppa in ppas: subprocess.run(f"add-apt-repository -y {ppa}".split()) def handle_apt_apps(apt_apps: 'list[str]'): print(f"\nNumber of packages: {len(apt_apps)}") print(f"List of packages: {apt_apps}\n") subprocess.run(f"apt list".split() + [*apt_apps]) print("Installing apps...") subprocess.run("apt install -y".split() + [*apt_apps]) subprocess.run("apt update".split()) if apt_ppas:
from src.bengio.augment import BengioAugmenter from utils import list_from_file from math import log10 model_name = input('Model: ') ba = BengioAugmenter('model/' + model_name) logprob = 0 n_word = 0 val_data = list_from_file('data/corpus_masked_test_preprocessed.txt') for sentence in val_data: print(sentence) seq = sentence.split(' ') if len(seq) >= 6: for i in range(len(seq) - 5): n_word += 1 seq_query = seq[i:i + 6] prob = ba.prob(seq_query) logprob += (-1) * log10(prob) print('Log probability', logprob) print('Banyak kata', n_word) perplexity = 10**(logprob / n_word) print('Perplexity', perplexity)
mode='max') self.model.fit(x_train, y_train, batch_size=32, epochs=max_epoch, validation_data=[x_val, y_val], callbacks=[mc]) self.model = load_model(save_filename, custom_objects={'f1': f1}) def evaluate(self, x_test, y_test): print(self.model.evaluate(x_test, y_test)) aspect_list = list_from_file('resource/aspect.txt') n_aspect = len(aspect_list) w2v_pathname = 'resource/w2v_path.txt' max_length = 50 fe = FeatureExtractor(max_length) fe.set_w2v(w2v_pathname, 500, keep_alive=True) def prepare_feature(filename): with open(filename) as file: data = json.load(file) sentences = [datum['sentence'] for datum in data] aspects = [datum['aspect'] for datum in data] sequences = [text_to_word_sequence(s) for s in sentences]