def __init__(self, path: str, nSubBins=2, interactive=False): self.__path = path self.__nSubBins = nSubBins self.__timeStepInSeconds = 10.0 self.scenarioData = ScenarioData(path, self.__timeStepInSeconds) self.data = Data(self.scenarioData, self.__nSubBins, self.__timeStepInSeconds) self.__fixedData = self.data.getInvariants() self.__initialScenarioData = ScenarioData(path, self.__timeStepInSeconds) self.__currentTimePeriod = None self.__microtypes = dict() # MicrotypeCollection(self.modeData.data) self.__demand = dict() # Demand() self.__choice = dict() # CollectedChoiceCharacteristics() self.__population = Population(self.scenarioData, self.__fixedData) self.__distanceBins = DistanceBins() self.__timePeriods = TimePeriods() self.__tripGeneration = TripGeneration() self.__transitionMatrices = TransitionMatrices(self.scenarioData, self.data.getSupply()) self.__originDestination = OriginDestination(self.__timePeriods, self.__distanceBins, self.__population, self.__transitionMatrices, self.__fixedData, self.scenarioData) self.__externalities = Externalities(self.scenarioData) self.__printLoc = stdout self.__interactive = interactive self.__tolerance = 2e-11 self.interact = Interact(self, figure=interactive) self.readFiles() self.initializeAllTimePeriods() self.__successful = True if interactive: self.interact.init()
def get(): import datetime start = datetime.datetime.now() task = longtime.delay() end = datetime.datetime.now() data = Data(message="It's OK, you get me", data=task.id, status=200) return data.to_response()
def __init__(self, source: str): self.source = source assert self.source in ['text', 'json'] self.data = Data() self.models_dir = self.data.cwd / self.MODELS_DIR_NAME self.results_dir = self.data.cwd / self.RESULTS_DIR_NAME self.test_data_by_kingdom = self.get_data()
def __init__(self): self.gaz_file = 'D:\\mygit\\NER_MODEL\\data\\data\\ctb.50d.vec' self.char_emb = 'D:\\mygit\\NER_MODEL\\data\\data\\gigaword_chn.all.a2b.uni.ite50.vec' self.train_file = 'D:\\mygit\\NER_MODEL\\data\\data\\demo.train.char' self.dev_file = 'D:\\mygit\\NER_MODEL\\data\\data\\demo.dev.char' self.test_file = 'D:\\mygit\\NER_MODEL\\data\\data\\demo.test.char' self.model_save_path = 'D:\\mygit\\NER_MODEL\\models\\ckpt' self.batch_size = 64 self.max_char_len = 100 self.emb_size = 50 self.max_lexicon_words_num = 5 self.num_units = 128 self.num_tags = 18 self.learning_rate = 0.005 self.optimizer = 'adam' self.epoch = 0 self.bichar_emb = None self.data = Data() self.load_data_and_embedding() self.model = Model_Lattice(self.max_char_len, self.emb_size, self.max_lexicon_words_num, self.num_units, self.num_tags, self.learning_rate) self.saver = tf.train.Saver()
def train(): datasets = ['yeast', 'scene', 'enron', 'image'] dataset = datasets[2] data = Data(dataset, label_type=0) x, y = data.load_data() camel.train(dataset, x, y, rho=1, alpha=0.1, alpha_ban=0.5, lam2=0.1)
def __init__(self, n_additional_items_by_bioconcept): self.n_additional_items_by_bioconcept = n_additional_items_by_bioconcept self.data = Data() self.kaggle_data = KaggleData() self.vanilla_nlp = spacy.load('en_core_web_sm') self.models_dir = self.data.cwd / SpacyDeepModel.MODELS_DIR n_iters_by_bioconcept_path = self.data.dict_dir / self.N_ITER_FILE_NAME self.n_iters_by_bioconcept = self.data.load_json(n_iters_by_bioconcept_path)
def __init__(self, bioconcept): self.bioconcept = bioconcept self.kingdom = [ kingdom for kingdom, bioconcepts in Data.BIOCONCEPTS_BY_KINGDOM.items() if bioconcept in bioconcepts ][0] self.data = Data()
def __init__(self, time_to_sleep=0.5): self.time_to_sleep = time_to_sleep self.token = os.getenv('EPPO_TOKEN', '') self.data = Data() self.nouns_not_in_eppo_path = self.data.dict_dir / self.NOUNS_NOT_IN_EPPO_FILE_NAME self.nouns_not_in_eppo = self.data.load_json( self.nouns_not_in_eppo_path) self.entity_taxonomies_by_bioconcept_path = self.data.dict_dir / self.OUTPUT_FILE_NAME
class SimpleLearning: def __init__(self): self.data = Data() self.entities_by_bioconcept = self.data.learn_training_entries() def fit_to_validation(self): output_json = {'result': []} results = [] for kingdom in ['animal', 'plant']: validation_data = self.data.read_json(kingdom, 'validation') for item in validation_data['result']: if 'content' not in item['example'].keys(): continue text = item['example']['content'].lower() output_item = { 'example': item['example'], 'results': { 'annotations': [], 'classifications': [] } } for bioconcept in Data.BIOCONCEPTS_BY_KINGDOM[kingdom]: for entity in self.entities_by_bioconcept[bioconcept]: start = text.find(entity) if start != -1: end = start + len(entity) annotation = { 'tag': bioconcept, 'start': start, 'end': end } output_item['results']['annotations'].append( annotation) result = { 'text': text, 'true': item['results']['annotations'], 'pred': output_item['results']['annotations'] } results.append(result) output_json['result'].append(output_item) return results, output_json def represent(self): items, _ = self.fit_to_validation() for item in items: text = item['text'] output_text = item['text'] print(f'{text}\n') for annotation in item['pred']: print(f'{annotation}\n') named_entity = f"{text[annotation['start']:annotation['end']]}" print(f'{named_entity}\n') coloured_entity = f"{Fore.RED}{named_entity}{Style.RESET_ALL}" output_text = output_text.replace(named_entity, coloured_entity) print( f'{output_text}\n --------------------------------------') input()
def test_data(): time0 = time.time() data = Data(42) time1 = time.time() assert data.value == 42 assert time0 <= data.last_update <= time1 data.value = 24 time2 = time.time() assert data.value == 24 assert time1 <= data.last_update <= time2
def post(): token = request.cookies.get('jwt', request.headers.get('Authorization', 'a.b.c')) secret_key = current_app.config['SECRET_KEY'] user_info = jwt.decode(token, secret_key) username = user_info['user'] key = get_key_to_hash('login', username=username) data = Data(message='logout success', status=200).to_response() data.delete_cookie('jwt') sentinel.master.delete(key) return data
def train_image(): datasets = ['yeast', 'scene', 'enron', 'image'] dataset = datasets[3] data = Data(dataset, label_type=0) x, y = data.load_data() x_train = x[0:1800] y_train = y[0:1800] x_test = x[1800:2000] y_test = y[1800:2000] camel_GPU.train_image(dataset, x_train, y_train, x_test, y_test, rho=1, alpha=0.1, alpha_ban=0.5, lam2=0.1)
def get(): try: limit = int(request.args['limit']) except BadRequestKeyError: limit = 100 except ValueError: data = Data(message='Invalid value of limit', status=422) return data.to_response() try: offset = int(request.args['offset']) except BadRequestKeyError: offset = 0 except ValueError: data = Data(message='Invalid value of offset', status=422) return data.to_response() results = Problem.query.order_by(Problem.id).filter_by(visible=True).limit(limit).offset(offset).all() problem_list = [] for item in results: problem = { "id": item.id, "title": item.title, "source": item.source, "submit_number": item.submit_number, "accepted_number": item.accepted_number } if item.submit_number == 0 or item.submit_number is None: problem['ac_rate'] = 0 else: problem['ac_rate'] = item.accepted_number / item.submit_number problem_list.append(problem) data = Data(data=problem_list, status=200) return data.to_response()
def get(): try: limit = int(request.args['limit']) except BadRequestKeyError: limit = 100 except ValueError: data = Data(message='Invalid value of limit', status=422) return data.to_response() try: offset = int(request.args['offset']) except BadRequestKeyError: offset = 0 except ValueError: data = Data(message='Invalid value of offset', status=422) return data.to_response() results = Contest.query.order_by(Contest.id).filter_by( visible=True).limit(limit).offset(offset).all() contest_list = [] for item in results: contest = { "id": item.id, "title": item.title, "start_time": item.start_time, "end_time": item.end_time, "type": item.permission_type } now = int(time.time()) if contest['end_time'] is None or now > contest['end_time']: contest['status'] = 'end' else: contest['status'] = 'running' contest_list.append(contest) data = Data(data=contest_list, status=200) return data.to_response()
def __init__(self): self.data = Data() self.entities_by_bioconcept = self.data.learn_training_entries() self.nlp = spacy.load(self.MODEL) self.eppo = Eppo(time_to_sleep=0.1) self.taxonomies_by_bioconcept = self.get_taxonomies_by_bioconcept() self.accepted_eppo_nouns_path = self.data.dict_dir / self.ACCEPTED_EPPO_NOUNS_FILE_NAME self.accepted_eppo_nouns_by_bioconcept = self.data.load_json(self.accepted_eppo_nouns_path) self.not_accepted_eppo_nouns_path = self.data.dict_dir / self.NOT_ACCEPTED_EPPO_NOUNS_FILE_NAME self.not_accepted_eppo_nouns_by_bioconcept = self.data.load_json(self.not_accepted_eppo_nouns_path) self.output_path = self.data.dict_dir / self.OUTPUT_FILE_NAME
def __init__(self): # print("Python Version: %s.%s"%(sys.version_info[0],sys.version_info[1])) # print("PyTorch Version:%s"%(torch.__version__)) # print("Process ID: ", os.getpid()) self.data = Data() self.data.HP_gpu = torch.cuda.is_available() if self.data.HP_gpu: self.data.device = 'cuda' # print("GPU:", self.data.HP_gpu, "; device:", self.data.device) self.optimizer = None self.model = None
def forward(self, x) -> Data: x = self.base(x) feat_t = self.GAP(x).view(x.size(0), -1) feat_c = self.bottleneck(feat_t) # normalize for angular softmax data = Data() data.feat_t = feat_t data.feat_c = feat_c if self.training: cls_score = self.classifier(feat_c) data.cls_score = cls_score # global feature for triplet loss return data
def __init__(self, server_mac, port=3): """ Initializer for a bluetooth connection, either as slave or master. :param server_mac: Address of the device which to connect to. :param port: The port via which to connect to the device. """ self.server_mac = server_mac self.port = port self.database = Data() self.socket = None self.out_sock = None self.in_sock = None
def main(): args = get_args() conf = __import__("config." + args.config, globals(), locals(), ["Conf"]).Conf helper = Helper(conf=conf) data = Data(conf) data.load_data() # you need to setup: data.train_loader/data.test_loader model = Model(conf).to(conf.device) print(model) training(conf, model, data.train_loader)
def train_val(): # trade-off para rho_list = [1] alpha_list = cp.arange(0, 1.1, 0.1) alpha_ban_list = cp.arange(0, 1.1, 0.1) lam2_list = cp.array([0.001, 0.002, 0.01, 0.02, 0.1, 0.2, 1]) datasets = ['yeast', 'scene', 'enron', 'image'] dataset = datasets[2] data = Data(dataset, label_type=0) x, y = data.load_data() camel_GPU.train_val(dataset, x, y, rho_list, alpha_list, alpha_ban_list, lam2_list)
def spider() -> Data: # 调用 requester,获取 html 信息 # 请先看完 utils.requester,再继续阅读 html = request(index_url) if html is None: print('Requesting ' + index_url + ' failed.') return # 根据获取的 html,生成 BeautifulSoup 对象,请查阅 BeautifulSoup 官方文档,了解用法 # 这里为啥要来一个 from_encoding='utf-8' 呢?请百度了解 utf-8 是什么 soup = BeautifulSoup(html, 'html.parser', from_encoding='utf-8') # 下面有关 BeautifulSoup 的部分,请对照武大要闻网站源码和 BeautifulSoup 文档阅读 div_list = soup.find_all('div', class_='infotitle') # 获得 Data 对象,请先看完 utils.data,再继续阅读 data = Data('武大要闻') # 遍历获取到的全文 urls for url in [ base_url + div.a['href'] for div in div_list if div.a is not None ]: html = request(url) if html is None: print('Requesting ' + url + ' failed.') continue soup = BeautifulSoup(html, 'html.parser', from_encoding='utf-8') title = soup.find('div', class_='news_title').string attrib = soup.find('div', class_='news_attrib').string content = soup.find('div', class_='v_news_content').get_text() # 为 data 添加条目 # 这里用到了 python datetime,请查阅 python 官方文档,了解这些函数的用法 # 这里还用到了正则表达式,请百度学习正则表达式的相关知识 data.add({ 'title': title.strip(), 'date': datetime.fromisoformat( re.search(r'发布时间:(\d{4}-\d{2}-\d{2}\s\d{2}:\d{2})', attrib).group(1)).isoformat(timespec='seconds'), 'from': re.search(r'来源:(\S*)', attrib).group(1), 'content': content.strip(), 'source_url': url }) return data
def __init__(self, max_char_len, emb_size, max_lexicon_words_num, num_units, num_tags, learning_rate): self.batch_size = 64 self.max_char_len = max_char_len self.emb_size = emb_size self.max_lexicon_words_num = max_lexicon_words_num self.num_units = num_units self.num_tags = num_tags self.learning_rate = learning_rate self.optimizer = 'adam' self.clip = 5 self.data = Data() self.data.build_word_pretrain_emb( 'D:\\mygit\\NER_MODEL\\data\\data\\gigaword_chn.all.a2b.uni.ite50.vec' ) self.data.build_gaz_pretrain_emb( 'D:\\mygit\\NER_MODEL\\data\\data\\ctb.50d.vec') def my_filter_callable(tensor): # A filter that detects zero-valued scalars. return len(tensor.shape) == 0 and tensor == 0.0 self.sess = tf_debug.LocalCLIDebugWrapperSession(tf.Session()) self.sess.add_tensor_filter('my_filter', my_filter_callable) self.sess = tf.Session() self.placeholders = {} self.epoch = 0 self.global_step = tf.Variable(0, trainable=False) self.char_ids = tf.placeholder(tf.int32, [None, self.max_char_len]) self.lexicon_word_ids = tf.placeholder( tf.int32, [None, self.max_char_len, self.max_lexicon_words_num]) self.word_length_tensor = tf.placeholder( tf.float32, [None, self.max_char_len, self.max_lexicon_words_num]) self.labels = tf.placeholder(tf.int32, [None, self.max_char_len]) self.lexicon_word_ids_reshape = tf.reshape( self.lexicon_word_ids, [-1, self.max_char_len * self.max_lexicon_words_num]) self.seq_length = tf.convert_to_tensor(self.batch_size * [self.max_char_len], dtype=tf.int32) self.placeholders["char_ids"] = self.char_ids self.placeholders["lexicon_word_ids"] = self.lexicon_word_ids self.placeholders["word_length_tensor"] = self.word_length_tensor self.placeholders["labels"] = self.labels self.create_embedding() self.create_declare() self.create_model() self.create_loss()
def __init__(self): self.data = Data() self.file_path = self.data.cwd / self.FILE_NAME self.df = self.load_df() grouped = self.df.groupby("Sentence #").apply(self.group_function) self.sentences = [sentence for sentence in grouped] np.random.shuffle(self.sentences)
def int8(self): address = self.tell() self.offset += 1 size = self.offset - address return Data( struct.unpack('B', self.data[address:self.offset])[0], address, size)
def uint16(self): address = self.tell() self.offset += 2 size = self.offset - address return Data( struct.unpack('H', self.data[address:self.offset])[0], address, size)
def single(self): address = self.tell() self.offset += 4 size = self.offset - address return Data( struct.unpack('f', self.data[address:self.offset])[0], address, size)
def double(self): address = self.tell() self.offset += 8 size = self.offset - address return Data( struct.unpack('d', self.data[address:self.offset])[0], address, size)
def __init__(self, ip_port): """ :param ip_port: ai-nlp-basic ip:port """ configs = self.read_configs(1) dset_path, model_path = configs['alphabet_path'], configs['model_path'] self.data = Data('', '', False) # 推理阶段需要构建ac tree self.data.trees = Trees.build_trees(configs['specific_words_file']) with open(dset_path, 'rb') as rbf: self.data.char_alphabet.instance2index = pickle.load( rbf) # keep_growing: False self.data.intent_alphabet.instance2index = pickle.load(rbf) self.data.label_alphabet.instance2index = pickle.load(rbf) self.data.label_alphabet.instances = pickle.load(rbf) self.data.char_alphabet_size = pickle.load(rbf) self.data.intent_alphabet_size = pickle.load(rbf) self.data.label_alphabet_size = pickle.load(rbf) self.model = BilstmCrf(self.data, configs) self.model.load_state_dict( torch.load(model_path, map_location=configs['map_location'])) self.model.eval() self.model.to(configs['device']) self.gpu = configs['gpu'] self.char_max_length = configs['char_max_length'] self.channel = grpc.insecure_channel(ip_port) self.stub = nlp_basic_pb2_grpc.NLPBasicServerStub(self.channel)
def evaluate(self, save_name=None): if save_name is None: model = self.model.cuda() else: save_path = "%s%s.mdl" % (PathConfig.MODEL_PATH, save_name) print("Loading model from %s..." % save_path) model = torch.load(save_path).cuda() print("Loaded.\n") total = ceil(len(self.data["test"][0]) / NetworkConfig.BATCH_SIZE) * NetworkConfig.BATCH_SIZE correct_count = 0 bar = ProgressBar(total, "Testing model...") progress = 0 for batch in Data.batches(self.data["test"]): X, Y = batch[0], batch[1] output = model(X) output = numpy.round(output.cpu().detach().numpy() * 5) Y = numpy.round(numpy.array(Y) * 5) correct_count += numpy.sum(output == Y) progress += NetworkConfig.BATCH_SIZE bar.refresh(progress) bar.finish("Accuracy: %f" % (correct_count / total))
def value(): data_list = [] data = Data.get_json_data("bnal.json") for i in data: data_list.append((i.get("name"), i.get("password"), i.get("exp"), i.get("tag"), i.get("desc"))) return data_list
def stability( data_filename, skill, cluster_number=3, method="hierarchical_concepts", runs=10, sample_size=0.7, plot=True ): results = [] for i in range(runs): data = Data(data_filename, train_seed=i, train_size=sample_size) if method == "spectral_items": data.only_first() results.append(item_clustering(data, skill, cluster_number=cluster_number, plot=False)) if method == "spectral_concepts": results.append(concept_clustering(data, skill, cluster_number=cluster_number, plot=False)) if method == "hierarchical_concepts": results.append( hierarchical_clustering( data, skill, concepts=True, cluster_number=cluster_number, corr_as_vectors=False, method="complete", dendrogram=False, ) ) if method == "hierarchical_concepts_vectors": results.append( hierarchical_clustering( data, skill, concepts=True, cluster_number=cluster_number, corr_as_vectors=True, method="ward", dendrogram=False, ) ) values = [] for l1, l2 in combinations(results, 2): values.append(v_measure_score(l1, l2)) if plot: plt.title("{} - {} ".format(method, skill)) plt.hist(values) return sum(values) / len(values)
def loadDataFile(self, name): """ Charge le fichier de données passé en paramètre. Cette fonction est appelée lors de l'ouverture d'un projet existant et lors du choix du fichier de données pour un nouveau projet """ log(2, "Loading datafile '%s'" % name) ## conversion de dos à unix # if not isUnixText(name): # dos2unix(name) new_data = Data(name) try: new_data.loadfromfile() # on ne garde le data que si le load n'a pas déclenché d'exception self.data = new_data microsat = "" sequences = "" et = "" if self.data.nloc_mic > 0: microsat = "%s microsat" % self.data.nloc_mic if self.data.nloc_seq > 0: sequences = "%s sequences" % self.data.nloc_seq if self.data.nloc_mic > 0 and self.data.nloc_seq > 0: et = " and " self.ui.dataFileInfoLabel.setText( "%s loci (%s%s%s)\n%s individuals in %s samples" % (self.data.nloc, microsat, et, sequences, self.data.nindtot, self.data.nsample) ) self.ui.dataFileEdit.setText(name) self.dataFileSource = name except Exception as e: log(1, traceback.format_exc()) keep = "" if self.ui.dataFileEdit.text() != "": keep = "\n\nThe file was not loaded, nothing was changed" output.notify(self, "Data file error", "%s%s" % (e, keep)) return False return True
plt.subplot(221) plt.title("Correlations of difficulties") sns.heatmap(results_d) plt.subplot(222) plt.title("Correlations of skills") sns.heatmap(results_s) plt.subplot(223) plt.title("Correlations of predictions") sns.heatmap(results_p) plt.subplot(224) sns.barplot(x="labels", y="rmse", data=df,) model_flat = lambda label: EloPriorCurrentModel(KC=2, KI=0.5) model_hierarchical = lambda label: EloHierarchicalModel(KC=1, KI=0.75, alpha=0.8, beta=0.02) data = Data("../data/matmat/2015-12-16/answers.pd") data_time_2 = Data("../data/matmat/2015-12-16/answers.pd", response_modification=TimeLimitResponseModificator([(5, 0.5)])) data_time_2b = Data("../data/matmat/2015-12-16/answers.pd", response_modification=TimeLimitResponseModificator([(7, 0.5)])) value = 0.5 single_step_time_split = { limit: ( lambda limit: Data("../data/matmat/2015-12-16/answers.pd", response_modification=TimeLimitResponseModificator([(limit, value)])), model_flat ) for limit in range(1, 15) } # compare_more_models(single_step_time_split) limit = 5 single_step_time_split_value = {
print ("GPU available:", gpu) print ("Status:", status) print ("Seg: ", seg) print ("Train file:", train_file) print ("Dev file:", dev_file) print ("Test file:", test_file) print ("Raw file:", raw_file) print ("Char emb:", char_emb) print ("Bichar emb:", bichar_emb) print ("Gaz file:",gaz_file) if status == 'train': print ("Model saved to:", save_model_dir) sys.stdout.flush() if status == 'train': data = Data() data.HP_gpu = gpu data.HP_use_char = False data.HP_batch_size = 10 data.use_bigram = False data.gaz_dropout = 0.5 data.norm_gaz_emb = False data.HP_fix_gaz_emb = False data_initialization(data, gaz_file, train_file, dev_file, test_file) data.generate_instance_with_gaz(train_file,'train') data.generate_instance_with_gaz(dev_file,'dev') data.generate_instance_with_gaz(test_file,'test') data.build_word_pretrain_emb(char_emb) data.build_biword_pretrain_emb(bichar_emb)
students["AB group"] = "" for i, name in ((14, "50%"), (15, "35%"), (16, "20%"), (17, "5%"), ): students["AB group"][groups == i] = name g = sns.PairGrid(students, hue="AB group", hue_order=["5%", "20%", "35%", "50%", ]) g = g.map_diag(sns.distplot) g = g.map_upper(plt.scatter) g = g.map_lower(sns.kdeplot) g = g.add_legend() else: g = sns.PairGrid(students) g = g.map_diag(plt.hist) g = g.map_upper(plt.scatter, marker=".") g = g.map_lower(sns.kdeplot, shade=False) data = Data("../data/matmat/2016-06-27/answers.pd") # data = Data("../data/slepemapy/2016-ab-target-difficulty/answers.pd") data.filter_data(10, 10) # response_times(data, time_dist=True, mean_times_dist=False) # answer_count(data, per_student=False, per_item=True, student_drop_off=False) # success_rate(data, per_student=False) # print(data.get_items_df().count()) pair_grid(data) plt.show() if False: model = EloPriorCurrentModel(KC=2, KI=0.5) # model = EloHierarchicalModel(KC=1, KI=0.75, alpha=0.8, beta=0.02)
["Basic model + noTime", "Basic model + thresholdTme", "Basic model + expTime", "Basic model + linearTime"], 10, runs=5, # eval_data=data_test ).to_pickle("Diff_times_H.pd") if False: df = pd.read_pickle("Diff_times.pd") # df = pd.read_pickle("Diff_models.pd") df["answers"] = ((df["answers"] / 15000).round() * 15).astype(int) print(df) # g = sns.factorplot(x="answers", y="correlation", hue="models", hue_order=['Basic model', 'Item Average', 'Concept model', 'Hierarchical model'], data=df, markers=["o", "^", "v", "s", "D"]) g = sns.factorplot(x="answers", y="correlation", hue="models", data=df, markers=["o", "^", "v", "s", "D"]) g.set_xlabels("Thousands of answers in the train set") g.set_ylabels("Correlation of parameters") g.set(ylim=(0,1)) if False: data = Data(filename, train_size=0.7) data.get_dataframe_train().to_pickle(filename.replace(".pd", ".train.pd")) data.get_dataframe_test().to_pickle(filename.replace(".pd", ".test.pd")) if False: d = data(None) df = d.get_dataframe_all() items = d.get_items_df() df.to_pickle(filename.replace(".pd", ".less_items.pd")) plt.show()
if data.seg: print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(name, time_cost, speed, acc, p, r, f)) else: print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f"%(name, time_cost, speed, acc)) return pred_results, pred_scores if __name__ == '__main__': parser = argparse.ArgumentParser(description='Tuning with NCRF++') # parser.add_argument('--status', choices=['train', 'decode'], help='update algorithm', default='train') parser.add_argument('--config', help='Configuration File' ) args = parser.parse_args() data = Data() data.HP_gpu = torch.cuda.is_available() data.read_config(args.config) status = data.status.lower() print("Seed num:",seed_num) if status == 'train': print("MODEL: train") data_initialization(data) data.generate_instance('train') data.generate_instance('dev') data.generate_instance('test') data.build_pretrain_emb() train(data) elif status == 'decode': print("MODEL: decode")
from utils.data import Data import seaborn as sns import pandas as pd import matplotlib.pylab as plt TRASHOLD = 25 d = Data("../data/matmat/2015-11-20/answers.pd") answers = d.get_dataframe_all() items = d.get_items_df() skills = d.get_skills_df() items = items.join(skills, on="skill_lvl_1") concepts = items["name"].unique() sts = {} for concept in concepts: print(concept) its = list(items[items["name"] == concept].index) students = answers[answers["item"].isin(its)].groupby("student").size() students = students[students >= TRASHOLD] print(len(students)) sts[concept] = students data = pd.DataFrame(index=concepts, columns=concepts, dtype=float) for concept1 in concepts: for concept2 in concepts: count = len(set(sts[concept1]) & set(sts[concept2])) print(concept1, concept2, count) data[concept1][concept2] = count
from matmat.experiments_clustering import all_in_one from models.eloPriorCurrent import EloPriorCurrentModel from utils import utils from utils.data import Data import pandas as pd import numpy as np import seaborn as sns import matplotlib.pylab as plt from utils.evaluator import Evaluator data = Data("../../data/matmat/2016-06-27/answers.pd", train_size=1) data.trim_times() answers = data.get_dataframe_all() def grid(data, model): utils.grid_search(data, model, {"KC": 3, "KI": 0.5}, { # {"alpha": 0.25, "beta": 0.02}, { "alpha": np.arange(0.4, 1.7, 0.2), "beta": np.arange(0., 0.2, 0.02), # "KC": np.arange(1.5, 5.0, 0.25), # "KI": np.arange(0, 2.5, 0.25), # }, plot_axes=["KC", "KI"]) }, plot_axes=["alpha", "beta"]) plt.show() items = data.get_items_df() items = items[(items["skill_lvl_2"] == 210) & ~items["skill_lvl_3"].isnull()].loc[:, ("question", "answer", "visualization")]
if __name__ == '__main__': parser = argparse.ArgumentParser(description='Tuning with NCRF++') parser.add_argument('--wordemb', help='Embedding for words', default='None') parser.add_argument('--charemb', help='Embedding for chars', default='None') parser.add_argument('--status', choices=['train', 'decode'], help='update algorithm', default='train') parser.add_argument('--savemodel', default="data/model/saved_model.lstmcrf.") parser.add_argument('--savedset', help='Dir of saved data setting') parser.add_argument('--train', default="data/conll03/train.bmes") parser.add_argument('--dev', default="data/conll03/dev.bmes" ) parser.add_argument('--test', default="data/conll03/test.bmes") parser.add_argument('--seg', default="True") parser.add_argument('--raw') parser.add_argument('--loadmodel') parser.add_argument('--output') args = parser.parse_args() data = Data() data.train_dir = args.train data.dev_dir = args.dev data.test_dir = args.test data.model_dir = args.savemodel data.dset_dir = args.savedset print("aaa",data.dset_dir) status = args.status.lower() save_model_dir = args.savemodel data.HP_gpu = torch.cuda.is_available() print("Seed num:",seed_num) data.number_normalized = True data.word_emb_dir = "../data/glove.6B.100d.txt" if status == 'train':
data, lambda l: Data(filename, response_modification=TimeLimitResponseModificator([(7, 0.5)])), lambda l: Data(filename, response_modification=ExpDrop(5, 0.9)), lambda l: Data(filename, response_modification=LinearDrop(14)), ], [basic_model, basic_model, basic_model, basic_model], ["Basic model + noTime", "Basic model + thresholdTme", "Basic model + expTime", "Basic model + linearTime"], 10, runs=5, data_ratio=ratio, # eval_data=data_test ) if 1: ratio = 1 model1 = basic_model(None) model2 = basic_model(None) data1 = Data(filename, train_size=ratio) median = data1.get_dataframe_all()['response_time'].median() print('time median', median) data2 = Data(filename, response_modification=LinearDrop(median * 2), train_size=ratio) # data2 = Data(filename, response_modification=TimeLimitResponseModificator([(median, 0.5)]), train_size=ratio) # data2 = Data(filename, response_modification=ExpDrop(median / 2, 0.9), train_size=ratio) Runner(data1, model1).run(force=True, only_train=True) Runner(data2, model2).run(force=True, only_train=True) items_ids = data1.get_items() items_ids = list(items_in_concept(data(None), 'division')) v1 = model1.get_difficulties(items_ids) v2 = model2.get_difficulties(items_ids) for item, x, y in zip(items_ids, v1, v2):
def get_requirement_data(self): """ call here the utils module for yaml loading """ data = Data(self.filename) return data.get_all_value()