Beispiel #1
0
    def __init__(self, path: str, nSubBins=2, interactive=False):
        self.__path = path
        self.__nSubBins = nSubBins
        self.__timeStepInSeconds = 10.0
        self.scenarioData = ScenarioData(path, self.__timeStepInSeconds)
        self.data = Data(self.scenarioData, self.__nSubBins, self.__timeStepInSeconds)
        self.__fixedData = self.data.getInvariants()

        self.__initialScenarioData = ScenarioData(path, self.__timeStepInSeconds)
        self.__currentTimePeriod = None
        self.__microtypes = dict()  # MicrotypeCollection(self.modeData.data)
        self.__demand = dict()  # Demand()
        self.__choice = dict()  # CollectedChoiceCharacteristics()
        self.__population = Population(self.scenarioData, self.__fixedData)
        self.__distanceBins = DistanceBins()
        self.__timePeriods = TimePeriods()
        self.__tripGeneration = TripGeneration()
        self.__transitionMatrices = TransitionMatrices(self.scenarioData, self.data.getSupply())
        self.__originDestination = OriginDestination(self.__timePeriods, self.__distanceBins, self.__population,
                                                     self.__transitionMatrices, self.__fixedData,
                                                     self.scenarioData)
        self.__externalities = Externalities(self.scenarioData)
        self.__printLoc = stdout
        self.__interactive = interactive
        self.__tolerance = 2e-11
        self.interact = Interact(self, figure=interactive)
        self.readFiles()
        self.initializeAllTimePeriods()
        self.__successful = True
        if interactive:
            self.interact.init()
Beispiel #2
0
 def get():
     import datetime
     start = datetime.datetime.now()
     task = longtime.delay()
     end = datetime.datetime.now()
     data = Data(message="It's OK, you get me", data=task.id, status=200)
     return data.to_response()
Beispiel #3
0
 def __init__(self, source: str):
     self.source = source
     assert self.source in ['text', 'json']
     self.data = Data()
     self.models_dir = self.data.cwd / self.MODELS_DIR_NAME
     self.results_dir = self.data.cwd / self.RESULTS_DIR_NAME
     self.test_data_by_kingdom = self.get_data()
Beispiel #4
0
    def __init__(self):

        self.gaz_file = 'D:\\mygit\\NER_MODEL\\data\\data\\ctb.50d.vec'
        self.char_emb = 'D:\\mygit\\NER_MODEL\\data\\data\\gigaword_chn.all.a2b.uni.ite50.vec'
        self.train_file = 'D:\\mygit\\NER_MODEL\\data\\data\\demo.train.char'
        self.dev_file = 'D:\\mygit\\NER_MODEL\\data\\data\\demo.dev.char'
        self.test_file = 'D:\\mygit\\NER_MODEL\\data\\data\\demo.test.char'
        self.model_save_path = 'D:\\mygit\\NER_MODEL\\models\\ckpt'

        self.batch_size = 64
        self.max_char_len = 100
        self.emb_size = 50
        self.max_lexicon_words_num = 5
        self.num_units = 128
        self.num_tags = 18
        self.learning_rate = 0.005
        self.optimizer = 'adam'
        self.epoch = 0
        self.bichar_emb = None
        self.data = Data()
        self.load_data_and_embedding()
        self.model = Model_Lattice(self.max_char_len, self.emb_size,
                                   self.max_lexicon_words_num, self.num_units,
                                   self.num_tags, self.learning_rate)
        self.saver = tf.train.Saver()
Beispiel #5
0
def train():
    datasets = ['yeast', 'scene', 'enron', 'image']
    dataset = datasets[2]
    data = Data(dataset, label_type=0)
    x, y = data.load_data()

    camel.train(dataset, x, y, rho=1, alpha=0.1, alpha_ban=0.5, lam2=0.1)
Beispiel #6
0
 def __init__(self, n_additional_items_by_bioconcept):
     self.n_additional_items_by_bioconcept = n_additional_items_by_bioconcept
     self.data = Data()
     self.kaggle_data = KaggleData()
     self.vanilla_nlp = spacy.load('en_core_web_sm')
     self.models_dir = self.data.cwd / SpacyDeepModel.MODELS_DIR
     n_iters_by_bioconcept_path = self.data.dict_dir / self.N_ITER_FILE_NAME
     self.n_iters_by_bioconcept = self.data.load_json(n_iters_by_bioconcept_path)
Beispiel #7
0
 def __init__(self, bioconcept):
     self.bioconcept = bioconcept
     self.kingdom = [
         kingdom
         for kingdom, bioconcepts in Data.BIOCONCEPTS_BY_KINGDOM.items()
         if bioconcept in bioconcepts
     ][0]
     self.data = Data()
Beispiel #8
0
 def __init__(self, time_to_sleep=0.5):
     self.time_to_sleep = time_to_sleep
     self.token = os.getenv('EPPO_TOKEN', '')
     self.data = Data()
     self.nouns_not_in_eppo_path = self.data.dict_dir / self.NOUNS_NOT_IN_EPPO_FILE_NAME
     self.nouns_not_in_eppo = self.data.load_json(
         self.nouns_not_in_eppo_path)
     self.entity_taxonomies_by_bioconcept_path = self.data.dict_dir / self.OUTPUT_FILE_NAME
Beispiel #9
0
class SimpleLearning:
    def __init__(self):
        self.data = Data()
        self.entities_by_bioconcept = self.data.learn_training_entries()

    def fit_to_validation(self):
        output_json = {'result': []}
        results = []
        for kingdom in ['animal', 'plant']:
            validation_data = self.data.read_json(kingdom, 'validation')
            for item in validation_data['result']:
                if 'content' not in item['example'].keys():
                    continue
                text = item['example']['content'].lower()
                output_item = {
                    'example': item['example'],
                    'results': {
                        'annotations': [],
                        'classifications': []
                    }
                }
                for bioconcept in Data.BIOCONCEPTS_BY_KINGDOM[kingdom]:
                    for entity in self.entities_by_bioconcept[bioconcept]:
                        start = text.find(entity)
                        if start != -1:
                            end = start + len(entity)
                            annotation = {
                                'tag': bioconcept,
                                'start': start,
                                'end': end
                            }
                            output_item['results']['annotations'].append(
                                annotation)
                result = {
                    'text': text,
                    'true': item['results']['annotations'],
                    'pred': output_item['results']['annotations']
                }
                results.append(result)
                output_json['result'].append(output_item)
        return results, output_json

    def represent(self):
        items, _ = self.fit_to_validation()
        for item in items:
            text = item['text']
            output_text = item['text']
            print(f'{text}\n')
            for annotation in item['pred']:
                print(f'{annotation}\n')
                named_entity = f"{text[annotation['start']:annotation['end']]}"
                print(f'{named_entity}\n')
                coloured_entity = f"{Fore.RED}{named_entity}{Style.RESET_ALL}"
                output_text = output_text.replace(named_entity,
                                                  coloured_entity)
                print(
                    f'{output_text}\n --------------------------------------')
                input()
Beispiel #10
0
def test_data():
    time0 = time.time()
    data = Data(42)
    time1 = time.time()
    assert data.value == 42
    assert time0 <= data.last_update <= time1
    data.value = 24
    time2 = time.time()
    assert data.value == 24
    assert time1 <= data.last_update <= time2
Beispiel #11
0
 def post():
     token = request.cookies.get('jwt', request.headers.get('Authorization', 'a.b.c'))
     secret_key = current_app.config['SECRET_KEY']
     user_info = jwt.decode(token, secret_key)
     username = user_info['user']
     key = get_key_to_hash('login', username=username)
     data = Data(message='logout success', status=200).to_response()
     data.delete_cookie('jwt')
     sentinel.master.delete(key)
     return data
Beispiel #12
0
def train_image():
    datasets = ['yeast', 'scene', 'enron', 'image']
    dataset = datasets[3]
    data = Data(dataset, label_type=0)
    x, y = data.load_data()
    x_train = x[0:1800]
    y_train = y[0:1800]
    x_test = x[1800:2000]
    y_test = y[1800:2000]
    camel_GPU.train_image(dataset, x_train, y_train, x_test, y_test, rho=1, alpha=0.1, alpha_ban=0.5, lam2=0.1)
 def get():
     try:
         limit = int(request.args['limit'])
     except BadRequestKeyError:
         limit = 100
     except ValueError:
         data = Data(message='Invalid value of limit', status=422)
         return data.to_response()
     try:
         offset = int(request.args['offset'])
     except BadRequestKeyError:
         offset = 0
     except ValueError:
         data = Data(message='Invalid value of offset', status=422)
         return data.to_response()
     results = Problem.query.order_by(Problem.id).filter_by(visible=True).limit(limit).offset(offset).all()
     problem_list = []
     for item in results:
         problem = {
             "id": item.id,
             "title": item.title,
             "source": item.source,
             "submit_number": item.submit_number,
             "accepted_number": item.accepted_number
         }
         if item.submit_number == 0 or item.submit_number is None:
             problem['ac_rate'] = 0
         else:
             problem['ac_rate'] = item.accepted_number / item.submit_number
         problem_list.append(problem)
     data = Data(data=problem_list, status=200)
     return data.to_response()
Beispiel #14
0
 def get():
     try:
         limit = int(request.args['limit'])
     except BadRequestKeyError:
         limit = 100
     except ValueError:
         data = Data(message='Invalid value of limit', status=422)
         return data.to_response()
     try:
         offset = int(request.args['offset'])
     except BadRequestKeyError:
         offset = 0
     except ValueError:
         data = Data(message='Invalid value of offset', status=422)
         return data.to_response()
     results = Contest.query.order_by(Contest.id).filter_by(
         visible=True).limit(limit).offset(offset).all()
     contest_list = []
     for item in results:
         contest = {
             "id": item.id,
             "title": item.title,
             "start_time": item.start_time,
             "end_time": item.end_time,
             "type": item.permission_type
         }
         now = int(time.time())
         if contest['end_time'] is None or now > contest['end_time']:
             contest['status'] = 'end'
         else:
             contest['status'] = 'running'
         contest_list.append(contest)
     data = Data(data=contest_list, status=200)
     return data.to_response()
Beispiel #15
0
 def __init__(self):
     self.data = Data()
     self.entities_by_bioconcept = self.data.learn_training_entries()
     self.nlp = spacy.load(self.MODEL)
     self.eppo = Eppo(time_to_sleep=0.1)
     self.taxonomies_by_bioconcept = self.get_taxonomies_by_bioconcept()
     self.accepted_eppo_nouns_path = self.data.dict_dir / self.ACCEPTED_EPPO_NOUNS_FILE_NAME
     self.accepted_eppo_nouns_by_bioconcept = self.data.load_json(self.accepted_eppo_nouns_path)
     self.not_accepted_eppo_nouns_path = self.data.dict_dir / self.NOT_ACCEPTED_EPPO_NOUNS_FILE_NAME
     self.not_accepted_eppo_nouns_by_bioconcept = self.data.load_json(self.not_accepted_eppo_nouns_path)
     self.output_path = self.data.dict_dir / self.OUTPUT_FILE_NAME
Beispiel #16
0
 def __init__(self):
     # print("Python Version: %s.%s"%(sys.version_info[0],sys.version_info[1]))
     # print("PyTorch Version:%s"%(torch.__version__))
     # print("Process ID: ", os.getpid())
     self.data = Data()
     self.data.HP_gpu = torch.cuda.is_available()
     if self.data.HP_gpu:
         self.data.device = 'cuda'
     # print("GPU:", self.data.HP_gpu, "; device:", self.data.device)
     self.optimizer = None
     self.model = None
 def forward(self, x) -> Data:
     x = self.base(x)
     feat_t = self.GAP(x).view(x.size(0), -1)
     feat_c = self.bottleneck(feat_t)  # normalize for angular softmax
     data = Data()
     data.feat_t = feat_t
     data.feat_c = feat_c
     if self.training:
         cls_score = self.classifier(feat_c)
         data.cls_score = cls_score  # global feature for triplet loss
     return data
Beispiel #18
0
 def __init__(self, server_mac, port=3):
     """
     Initializer for a bluetooth connection, either as slave or master.
     :param server_mac: Address of the device which to connect to.
     :param port: The port via which to connect to the device.
     """
     self.server_mac = server_mac
     self.port = port
     self.database = Data()
     self.socket = None
     self.out_sock = None
     self.in_sock = None
Beispiel #19
0
def main():
    args = get_args()
    conf = __import__("config." + args.config, globals(), locals(), ["Conf"]).Conf
    helper = Helper(conf=conf)

    data = Data(conf)
    data.load_data()
    # you need to setup: data.train_loader/data.test_loader

    model = Model(conf).to(conf.device)
    print(model)
    training(conf, model, data.train_loader)
Beispiel #20
0
def train_val():
    # trade-off para
    rho_list = [1]
    alpha_list = cp.arange(0, 1.1, 0.1)
    alpha_ban_list = cp.arange(0, 1.1, 0.1)
    lam2_list = cp.array([0.001, 0.002, 0.01, 0.02, 0.1, 0.2, 1])

    datasets = ['yeast', 'scene', 'enron', 'image']
    dataset = datasets[2]
    data = Data(dataset, label_type=0)
    x, y = data.load_data()
    camel_GPU.train_val(dataset, x, y, rho_list, alpha_list, alpha_ban_list, lam2_list)
Beispiel #21
0
def spider() -> Data:
    # 调用 requester,获取 html 信息
    # 请先看完 utils.requester,再继续阅读
    html = request(index_url)
    if html is None:
        print('Requesting ' + index_url + ' failed.')
        return

    # 根据获取的 html,生成 BeautifulSoup 对象,请查阅 BeautifulSoup 官方文档,了解用法
    # 这里为啥要来一个 from_encoding='utf-8' 呢?请百度了解 utf-8 是什么
    soup = BeautifulSoup(html, 'html.parser', from_encoding='utf-8')

    # 下面有关 BeautifulSoup 的部分,请对照武大要闻网站源码和 BeautifulSoup 文档阅读
    div_list = soup.find_all('div', class_='infotitle')

    # 获得 Data 对象,请先看完 utils.data,再继续阅读
    data = Data('武大要闻')

    # 遍历获取到的全文 urls
    for url in [
            base_url + div.a['href'] for div in div_list if div.a is not None
    ]:
        html = request(url)
        if html is None:
            print('Requesting ' + url + ' failed.')
            continue

        soup = BeautifulSoup(html, 'html.parser', from_encoding='utf-8')

        title = soup.find('div', class_='news_title').string
        attrib = soup.find('div', class_='news_attrib').string
        content = soup.find('div', class_='v_news_content').get_text()

        # 为 data 添加条目
        # 这里用到了 python datetime,请查阅 python 官方文档,了解这些函数的用法
        # 这里还用到了正则表达式,请百度学习正则表达式的相关知识
        data.add({
            'title':
            title.strip(),
            'date':
            datetime.fromisoformat(
                re.search(r'发布时间:(\d{4}-\d{2}-\d{2}\s\d{2}:\d{2})',
                          attrib).group(1)).isoformat(timespec='seconds'),
            'from':
            re.search(r'来源:(\S*)', attrib).group(1),
            'content':
            content.strip(),
            'source_url':
            url
        })

    return data
Beispiel #22
0
    def __init__(self, max_char_len, emb_size, max_lexicon_words_num,
                 num_units, num_tags, learning_rate):
        self.batch_size = 64
        self.max_char_len = max_char_len
        self.emb_size = emb_size
        self.max_lexicon_words_num = max_lexicon_words_num
        self.num_units = num_units
        self.num_tags = num_tags
        self.learning_rate = learning_rate
        self.optimizer = 'adam'
        self.clip = 5
        self.data = Data()
        self.data.build_word_pretrain_emb(
            'D:\\mygit\\NER_MODEL\\data\\data\\gigaword_chn.all.a2b.uni.ite50.vec'
        )
        self.data.build_gaz_pretrain_emb(
            'D:\\mygit\\NER_MODEL\\data\\data\\ctb.50d.vec')

        def my_filter_callable(tensor):
            # A filter that detects zero-valued scalars.
            return len(tensor.shape) == 0 and tensor == 0.0

        self.sess = tf_debug.LocalCLIDebugWrapperSession(tf.Session())
        self.sess.add_tensor_filter('my_filter', my_filter_callable)

        self.sess = tf.Session()
        self.placeholders = {}
        self.epoch = 0
        self.global_step = tf.Variable(0, trainable=False)

        self.char_ids = tf.placeholder(tf.int32, [None, self.max_char_len])
        self.lexicon_word_ids = tf.placeholder(
            tf.int32, [None, self.max_char_len, self.max_lexicon_words_num])
        self.word_length_tensor = tf.placeholder(
            tf.float32, [None, self.max_char_len, self.max_lexicon_words_num])
        self.labels = tf.placeholder(tf.int32, [None, self.max_char_len])

        self.lexicon_word_ids_reshape = tf.reshape(
            self.lexicon_word_ids,
            [-1, self.max_char_len * self.max_lexicon_words_num])
        self.seq_length = tf.convert_to_tensor(self.batch_size *
                                               [self.max_char_len],
                                               dtype=tf.int32)
        self.placeholders["char_ids"] = self.char_ids
        self.placeholders["lexicon_word_ids"] = self.lexicon_word_ids
        self.placeholders["word_length_tensor"] = self.word_length_tensor
        self.placeholders["labels"] = self.labels
        self.create_embedding()
        self.create_declare()
        self.create_model()
        self.create_loss()
Beispiel #23
0
 def __init__(self):
     self.data = Data()
     self.file_path = self.data.cwd / self.FILE_NAME
     self.df = self.load_df()
     grouped = self.df.groupby("Sentence #").apply(self.group_function)
     self.sentences = [sentence for sentence in grouped]
     np.random.shuffle(self.sentences)
 def int8(self):
     address = self.tell()
     self.offset += 1
     size = self.offset - address
     return Data(
         struct.unpack('B', self.data[address:self.offset])[0], address,
         size)
 def uint16(self):
     address = self.tell()
     self.offset += 2
     size = self.offset - address
     return Data(
         struct.unpack('H', self.data[address:self.offset])[0], address,
         size)
 def single(self):
     address = self.tell()
     self.offset += 4
     size = self.offset - address
     return Data(
         struct.unpack('f', self.data[address:self.offset])[0], address,
         size)
 def double(self):
     address = self.tell()
     self.offset += 8
     size = self.offset - address
     return Data(
         struct.unpack('d', self.data[address:self.offset])[0], address,
         size)
    def __init__(self, ip_port):
        """

		:param ip_port: ai-nlp-basic ip:port
		"""
        configs = self.read_configs(1)
        dset_path, model_path = configs['alphabet_path'], configs['model_path']
        self.data = Data('', '', False)
        # 推理阶段需要构建ac tree
        self.data.trees = Trees.build_trees(configs['specific_words_file'])
        with open(dset_path, 'rb') as rbf:
            self.data.char_alphabet.instance2index = pickle.load(
                rbf)  # keep_growing: False
            self.data.intent_alphabet.instance2index = pickle.load(rbf)
            self.data.label_alphabet.instance2index = pickle.load(rbf)
            self.data.label_alphabet.instances = pickle.load(rbf)
            self.data.char_alphabet_size = pickle.load(rbf)
            self.data.intent_alphabet_size = pickle.load(rbf)
            self.data.label_alphabet_size = pickle.load(rbf)

        self.model = BilstmCrf(self.data, configs)
        self.model.load_state_dict(
            torch.load(model_path, map_location=configs['map_location']))
        self.model.eval()
        self.model.to(configs['device'])

        self.gpu = configs['gpu']
        self.char_max_length = configs['char_max_length']

        self.channel = grpc.insecure_channel(ip_port)
        self.stub = nlp_basic_pb2_grpc.NLPBasicServerStub(self.channel)
Beispiel #29
0
    def evaluate(self, save_name=None):
        if save_name is None:
            model = self.model.cuda()
        else:
            save_path = "%s%s.mdl" % (PathConfig.MODEL_PATH, save_name)
            print("Loading model from %s..." % save_path)
            model = torch.load(save_path).cuda()
            print("Loaded.\n")

        total = ceil(len(self.data["test"][0]) /
                     NetworkConfig.BATCH_SIZE) * NetworkConfig.BATCH_SIZE
        correct_count = 0

        bar = ProgressBar(total, "Testing model...")
        progress = 0

        for batch in Data.batches(self.data["test"]):
            X, Y = batch[0], batch[1]
            output = model(X)

            output = numpy.round(output.cpu().detach().numpy() * 5)
            Y = numpy.round(numpy.array(Y) * 5)
            correct_count += numpy.sum(output == Y)

            progress += NetworkConfig.BATCH_SIZE
            bar.refresh(progress)

        bar.finish("Accuracy: %f" % (correct_count / total))
Beispiel #30
0
def value():
    data_list = []
    data = Data.get_json_data("bnal.json")
    for i in data:
        data_list.append((i.get("name"), i.get("password"), i.get("exp"),
                          i.get("tag"), i.get("desc")))
    return data_list
def stability(
    data_filename, skill, cluster_number=3, method="hierarchical_concepts", runs=10, sample_size=0.7, plot=True
):
    results = []

    for i in range(runs):
        data = Data(data_filename, train_seed=i, train_size=sample_size)
        if method == "spectral_items":
            data.only_first()
            results.append(item_clustering(data, skill, cluster_number=cluster_number, plot=False))
        if method == "spectral_concepts":
            results.append(concept_clustering(data, skill, cluster_number=cluster_number, plot=False))
        if method == "hierarchical_concepts":
            results.append(
                hierarchical_clustering(
                    data,
                    skill,
                    concepts=True,
                    cluster_number=cluster_number,
                    corr_as_vectors=False,
                    method="complete",
                    dendrogram=False,
                )
            )
        if method == "hierarchical_concepts_vectors":
            results.append(
                hierarchical_clustering(
                    data,
                    skill,
                    concepts=True,
                    cluster_number=cluster_number,
                    corr_as_vectors=True,
                    method="ward",
                    dendrogram=False,
                )
            )

    values = []
    for l1, l2 in combinations(results, 2):
        values.append(v_measure_score(l1, l2))

    if plot:
        plt.title("{} - {} ".format(method, skill))
        plt.hist(values)
    return sum(values) / len(values)
Beispiel #32
0
 def loadDataFile(self, name):
     """ Charge le fichier de données passé en paramètre. Cette fonction est appelée lors
     de l'ouverture d'un projet existant et lors du choix du fichier de données pour un nouveau projet
     """
     log(2, "Loading datafile '%s'" % name)
     ## conversion de dos à unix
     # if not isUnixText(name):
     #    dos2unix(name)
     new_data = Data(name)
     try:
         new_data.loadfromfile()
         # on ne garde le data que si le load n'a pas déclenché d'exception
         self.data = new_data
         microsat = ""
         sequences = ""
         et = ""
         if self.data.nloc_mic > 0:
             microsat = "%s microsat" % self.data.nloc_mic
         if self.data.nloc_seq > 0:
             sequences = "%s sequences" % self.data.nloc_seq
         if self.data.nloc_mic > 0 and self.data.nloc_seq > 0:
             et = " and "
         self.ui.dataFileInfoLabel.setText(
             "%s loci (%s%s%s)\n%s individuals in %s samples"
             % (self.data.nloc, microsat, et, sequences, self.data.nindtot, self.data.nsample)
         )
         self.ui.dataFileEdit.setText(name)
         self.dataFileSource = name
     except Exception as e:
         log(1, traceback.format_exc())
         keep = ""
         if self.ui.dataFileEdit.text() != "":
             keep = "\n\nThe file was not loaded, nothing was changed"
         output.notify(self, "Data file error", "%s%s" % (e, keep))
         return False
     return True
Beispiel #33
0
    plt.subplot(221)
    plt.title("Correlations of difficulties")
    sns.heatmap(results_d)
    plt.subplot(222)
    plt.title("Correlations of skills")
    sns.heatmap(results_s)
    plt.subplot(223)
    plt.title("Correlations of predictions")
    sns.heatmap(results_p)
    plt.subplot(224)
    sns.barplot(x="labels", y="rmse", data=df,)


model_flat = lambda label: EloPriorCurrentModel(KC=2, KI=0.5)
model_hierarchical = lambda label: EloHierarchicalModel(KC=1, KI=0.75, alpha=0.8, beta=0.02)
data = Data("../data/matmat/2015-12-16/answers.pd")
data_time_2 = Data("../data/matmat/2015-12-16/answers.pd", response_modification=TimeLimitResponseModificator([(5, 0.5)]))
data_time_2b = Data("../data/matmat/2015-12-16/answers.pd", response_modification=TimeLimitResponseModificator([(7, 0.5)]))

value = 0.5
single_step_time_split = {
    limit: (
        lambda limit: Data("../data/matmat/2015-12-16/answers.pd", response_modification=TimeLimitResponseModificator([(limit, value)])),
        model_flat
    )
    for limit in range(1, 15)
}
# compare_more_models(single_step_time_split)

limit = 5
single_step_time_split_value = {
Beispiel #34
0
    print ("GPU available:", gpu)
    print ("Status:", status)
    print ("Seg: ", seg)
    print ("Train file:", train_file)
    print ("Dev file:", dev_file)
    print ("Test file:", test_file)
    print ("Raw file:", raw_file)
    print ("Char emb:", char_emb)
    print ("Bichar emb:", bichar_emb)
    print ("Gaz file:",gaz_file)
    if status == 'train':
        print ("Model saved to:", save_model_dir)
    sys.stdout.flush()
    
    if status == 'train':
        data = Data()
        data.HP_gpu = gpu
        data.HP_use_char = False
        data.HP_batch_size = 10
        data.use_bigram = False
        data.gaz_dropout = 0.5
        data.norm_gaz_emb = False
        data.HP_fix_gaz_emb = False
        data_initialization(data, gaz_file, train_file, dev_file, test_file)

        data.generate_instance_with_gaz(train_file,'train')
        data.generate_instance_with_gaz(dev_file,'dev')
        data.generate_instance_with_gaz(test_file,'test')

        data.build_word_pretrain_emb(char_emb)
        data.build_biword_pretrain_emb(bichar_emb)
Beispiel #35
0
        students["AB group"] = ""
        for i, name in ((14, "50%"), (15, "35%"), (16, "20%"), (17, "5%"), ):
            students["AB group"][groups == i] = name
        g = sns.PairGrid(students, hue="AB group", hue_order=["5%", "20%", "35%", "50%", ])
        g = g.map_diag(sns.distplot)
        g = g.map_upper(plt.scatter)
        g = g.map_lower(sns.kdeplot)
        g = g.add_legend()
    else:
        g = sns.PairGrid(students)
        g = g.map_diag(plt.hist)
        g = g.map_upper(plt.scatter, marker=".")
        g = g.map_lower(sns.kdeplot, shade=False)


data = Data("../data/matmat/2016-06-27/answers.pd")
# data = Data("../data/slepemapy/2016-ab-target-difficulty/answers.pd")
data.filter_data(10, 10)

# response_times(data, time_dist=True, mean_times_dist=False)
# answer_count(data, per_student=False, per_item=True, student_drop_off=False)
# success_rate(data, per_student=False)
# print(data.get_items_df().count())

pair_grid(data)
plt.show()


if False:
    model = EloPriorCurrentModel(KC=2, KI=0.5)
    # model = EloHierarchicalModel(KC=1, KI=0.75, alpha=0.8, beta=0.02)
Beispiel #36
0
        ["Basic model + noTime", "Basic model + thresholdTme", "Basic model + expTime", "Basic model + linearTime"],
        10, runs=5,
        # eval_data=data_test
    ).to_pickle("Diff_times_H.pd")

if False:
    df = pd.read_pickle("Diff_times.pd")
    # df = pd.read_pickle("Diff_models.pd")
    df["answers"] = ((df["answers"] / 15000).round() * 15).astype(int)
    print(df)
    # g = sns.factorplot(x="answers", y="correlation", hue="models", hue_order=['Basic model', 'Item Average', 'Concept model', 'Hierarchical model'], data=df, markers=["o", "^", "v", "s", "D"])
    g = sns.factorplot(x="answers", y="correlation", hue="models", data=df, markers=["o", "^", "v", "s", "D"])

    g.set_xlabels("Thousands of answers in the train set")
    g.set_ylabels("Correlation of parameters")
    g.set(ylim=(0,1))

if False:
    data = Data(filename, train_size=0.7)
    data.get_dataframe_train().to_pickle(filename.replace(".pd", ".train.pd"))
    data.get_dataframe_test().to_pickle(filename.replace(".pd", ".test.pd"))

if False:
    d = data(None)
    df = d.get_dataframe_all()
    items = d.get_items_df()

    df.to_pickle(filename.replace(".pd", ".less_items.pd"))

plt.show()
Beispiel #37
0
    if data.seg:
        print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(name, time_cost, speed, acc, p, r, f))
    else:
        print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f"%(name, time_cost, speed, acc))
    return pred_results, pred_scores




if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Tuning with NCRF++')
    # parser.add_argument('--status', choices=['train', 'decode'], help='update algorithm', default='train')
    parser.add_argument('--config',  help='Configuration File' )

    args = parser.parse_args()
    data = Data()
    data.HP_gpu = torch.cuda.is_available()
    data.read_config(args.config)
    status = data.status.lower()
    print("Seed num:",seed_num)

    if status == 'train':
        print("MODEL: train")
        data_initialization(data)
        data.generate_instance('train')
        data.generate_instance('dev')
        data.generate_instance('test')
        data.build_pretrain_emb()
        train(data)
    elif status == 'decode':
        print("MODEL: decode")
from utils.data import Data
import seaborn as sns
import pandas as pd
import matplotlib.pylab as plt

TRASHOLD = 25

d = Data("../data/matmat/2015-11-20/answers.pd")
answers = d.get_dataframe_all()
items = d.get_items_df()
skills = d.get_skills_df()
items = items.join(skills, on="skill_lvl_1")

concepts = items["name"].unique()
sts = {}

for concept in concepts:
    print(concept)
    its = list(items[items["name"] == concept].index)
    students = answers[answers["item"].isin(its)].groupby("student").size()
    students = students[students >= TRASHOLD]
    print(len(students))
    sts[concept] = students

data = pd.DataFrame(index=concepts, columns=concepts, dtype=float)

for concept1 in concepts:
    for concept2 in concepts:
        count = len(set(sts[concept1]) & set(sts[concept2]))
        print(concept1, concept2, count)
        data[concept1][concept2] = count
from matmat.experiments_clustering import all_in_one
from models.eloPriorCurrent import EloPriorCurrentModel
from utils import utils
from utils.data import Data
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pylab as plt

from utils.evaluator import Evaluator

data = Data("../../data/matmat/2016-06-27/answers.pd", train_size=1)
data.trim_times()
answers = data.get_dataframe_all()

def grid(data, model):
    utils.grid_search(data, model,
          {"KC": 3, "KI": 0.5}, {
          # {"alpha": 0.25, "beta": 0.02}, {
              "alpha": np.arange(0.4, 1.7, 0.2),
              "beta": np.arange(0., 0.2, 0.02),
              # "KC": np.arange(1.5, 5.0, 0.25),
              # "KI": np.arange(0, 2.5, 0.25),
          # }, plot_axes=["KC", "KI"])
        }, plot_axes=["alpha", "beta"])

    plt.show()


items = data.get_items_df()
items = items[(items["skill_lvl_2"] == 210) & ~items["skill_lvl_3"].isnull()].loc[:, ("question", "answer", "visualization")]
Beispiel #40
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Tuning with NCRF++')
    parser.add_argument('--wordemb',  help='Embedding for words', default='None')
    parser.add_argument('--charemb',  help='Embedding for chars', default='None')
    parser.add_argument('--status', choices=['train', 'decode'], help='update algorithm', default='train')
    parser.add_argument('--savemodel', default="data/model/saved_model.lstmcrf.")
    parser.add_argument('--savedset', help='Dir of saved data setting')
    parser.add_argument('--train', default="data/conll03/train.bmes") 
    parser.add_argument('--dev', default="data/conll03/dev.bmes" )  
    parser.add_argument('--test', default="data/conll03/test.bmes") 
    parser.add_argument('--seg', default="True") 
    parser.add_argument('--raw') 
    parser.add_argument('--loadmodel')
    parser.add_argument('--output') 
    args = parser.parse_args()
    data = Data()
    
    data.train_dir = args.train 
    data.dev_dir = args.dev 
    data.test_dir = args.test
    data.model_dir = args.savemodel
    data.dset_dir = args.savedset
    print("aaa",data.dset_dir)
    status = args.status.lower()
    save_model_dir = args.savemodel
    data.HP_gpu = torch.cuda.is_available()
    print("Seed num:",seed_num)
    data.number_normalized = True
    data.word_emb_dir = "../data/glove.6B.100d.txt"
    
    if status == 'train':
            data,
            lambda l: Data(filename, response_modification=TimeLimitResponseModificator([(7, 0.5)])),
            lambda l: Data(filename, response_modification=ExpDrop(5, 0.9)),
            lambda l: Data(filename, response_modification=LinearDrop(14)),
        ],
        [basic_model, basic_model, basic_model, basic_model],
        ["Basic model + noTime", "Basic model + thresholdTme", "Basic model + expTime", "Basic model + linearTime"],
        10, runs=5, data_ratio=ratio,
        # eval_data=data_test
    )

if 1:
    ratio = 1
    model1 = basic_model(None)
    model2 = basic_model(None)
    data1 = Data(filename, train_size=ratio)
    median = data1.get_dataframe_all()['response_time'].median()
    print('time median', median)
    data2 = Data(filename, response_modification=LinearDrop(median * 2), train_size=ratio)
    # data2 = Data(filename, response_modification=TimeLimitResponseModificator([(median, 0.5)]), train_size=ratio)
    # data2 = Data(filename, response_modification=ExpDrop(median / 2, 0.9), train_size=ratio)

    Runner(data1, model1).run(force=True, only_train=True)
    Runner(data2, model2).run(force=True, only_train=True)

    items_ids = data1.get_items()
    items_ids = list(items_in_concept(data(None), 'division'))

    v1 = model1.get_difficulties(items_ids)
    v2 = model2.get_difficulties(items_ids)
    for item, x, y in zip(items_ids, v1, v2):
Beispiel #42
0
 def get_requirement_data(self):
     """ call here the utils module for yaml loading """
     data = Data(self.filename)
     return data.get_all_value()