Exemple #1
0
    def post(self):
        res = {}
        try:
            # get all sim words
            base_word = request.json['base_word']
            print('recv base word:', base_word)
            sim_list = []
            for item in word_model.most_similar(base_word):
                sim_list.append(item[0])
            res['sim'] = sim_list

            # get all important words
            class_name = request.json['class_name']
            print('recv classname:', class_name)
            important_list = []
            for classname in class_name:
                remap_name = class_name_remap[classname]
                word_df = word_importance_model.ix[remap_name]
                important_list += list(flatten(word_df[[2]].head(20).values.tolist()))
            res['important'] = important_list

            # get all industry words
            industry_list = []
            for classname in class_name:
                value_name = label2value[classname]
                industry_df = industry_word_model.loc[
                industry_word_model.classname.str.contains(value_name)]
                industry_list += list(flatten(industry_df[[1]].head(20).values.tolist()))
            res['industry'] = industry_list

        except Exception as e:
            print(e)

        print("res words:", res)
        return res
Exemple #2
0
    def predict(self, sent, pipeline=False):
        self.encode(sent)

        if self.full:
            constraints = [] if self.args.no_lin_constraint else self.get_tree_constraints(
                sent)
            res = self.decode(sent[self.pred_input_key], constraints)
            sent['linearized_tokens'] = res['seq']
        else:
            for token in traverse_bottomup(sent.root):
                domain = (
                    [token] +
                    token['pdeps']) if self.args.pred_tree else token['domain']
                if len(domain) > 1:
                    constraints = [] if self.args.no_lin_constraint else self.get_subtree_constraints(
                        token)
                    res = self.decode(domain, constraints)
                    token['linearized_domain'] = res['seq']
                    # add predicted sequential information
                    f_vec = self.f_lstm.initial_state().transduce(
                        [t.vecs[self.vec_key] for t in res['seq']])[-1]
                    b_vec = self.b_lstm.initial_state().transduce(
                        [t.vecs[self.vec_key] for t in res['seq'][::-1]])[-1]
                    token.vecs[self.vec_key] += (f_vec + b_vec)
                else:
                    token['linearized_domain'] = [token]

            sent['linearized_tokens'] = flatten(token, 'linearized_domain')
Exemple #3
0
    def post(self):
        class_name = request.json['classname']
        print('recv class_name:', class_name)
        class_name = class_name_remap[class_name]
        word_df = word_importance_model.ix[class_name]
        word_list = list(flatten(word_df[[2]].head(20).values.tolist()))

        return word_list
Exemple #4
0
    def post(self):
        class_name = request.json['classname']
        print('recv class_name:', class_name)
        class_name = class_name_remap[class_name]
        word_df = word_importance_model.ix[class_name]
        word_list = list(flatten(word_df[[2]].head(20).values.tolist()))

        return word_list
Exemple #5
0
    def post(self):
        class_name = request.json['classname']
        print('recv class_name:', class_name)
        class_name = label2value[class_name]
        #industy_word_path = "../data/words/word_{name}".format(name=class_name)
        industry_df = industry_word_model.loc[
            industry_word_model.classname.str.contains(class_name)]

        word_list = list(flatten(industry_df[[1]].head(20).values.tolist()))

        return word_list
Exemple #6
0
    def post(self):
        class_name = request.json['classname']
        print('recv class_name:', class_name)
        class_name = label2value[class_name]
        #industy_word_path = "../data/words/word_{name}".format(name=class_name)
        industry_df = industry_word_model.loc[
            industry_word_model.classname.str.contains(class_name)]

        word_list = list(flatten(industry_df[[1]].head(20).values.tolist()))

        return word_list
Exemple #7
0
    def post(self):
        res = {}
        try:
            # get all sim words
            base_word = request.json['base_word']
            print('recv base word:', base_word)
            sim_list = []
            for item in word_model.most_similar(base_word):
                sim_list.append(item[0])
            res['sim'] = sim_list

            # get all important words
            class_name = request.json['class_name']
            print('recv classname:', class_name)
            important_list = []
            for classname in class_name:
                remap_name = class_name_remap[classname]
                word_df = word_importance_model.ix[remap_name]
                important_list += list(
                    flatten(word_df[[2]].head(20).values.tolist()))
            res['important'] = important_list

            # get all industry words
            industry_list = []
            for classname in class_name:
                value_name = label2value[classname]
                industry_df = industry_word_model.loc[
                    industry_word_model.classname.str.contains(value_name)]
                industry_list += list(
                    flatten(industry_df[[1]].head(20).values.tolist()))
            res['industry'] = industry_list

        except Exception as e:
            print(e)

        print("res words:", res)
        return res
Exemple #8
0
    def train_one_step(self, sent):
        total = correct = loss = 0
        t0 = time()

        self.encode(sent)

        if self.full:
            constraints = [] if self.args.no_lin_constraint else self.get_tree_constraints(
                sent)
            res = self.decode(sent[self.train_input_key], constraints, True)
            loss = res['loss']
            total += 1
            sent['linearized_tokens'] = res['seq']
            correct += int(
                sent['linearized_tokens'] == sent['gold_linearized_tokens'])
        else:
            for token in traverse_bottomup(sent.root):
                domain = (
                    [token] +
                    token['pdeps']) if self.args.pred_tree else token['domain']
                if len(domain) > 1:
                    constraints = [] if self.args.no_lin_constraint else self.get_subtree_constraints(
                        token)
                    res = self.decode(domain, constraints, True)
                    token['linearized_domain'] = res['seq']
                    loss += res['loss']
                    total += 1
                    correct += int(token['linearized_domain'] ==
                                   token['gold_linearized_domain'])
                    # add predicted sequential information
                    f_vec = self.f_lstm.initial_state().transduce(
                        [t.vecs[self.vec_key] for t in res['seq']])[-1]
                    b_vec = self.b_lstm.initial_state().transduce(
                        [t.vecs[self.vec_key] for t in res['seq'][::-1]])[-1]
                    token.vecs[self.vec_key] += (f_vec + b_vec)
                else:
                    token['linearized_domain'] = [token]

            sent['linearized_tokens'] = flatten(token, 'linearized_domain')

        loss_value = loss.value() if loss else 0

        return {
            'time': time() - t0,
            'loss': loss_value,
            'loss_expr': loss,
            'total': total,
            'correct': correct
        }
Exemple #9
0
    def post(self):
        res = {}
        try:
            # get all important words
            base_word = request.json['base_word']
            class_name = request.json['class_name']
            print('recv classname:', class_name)
            important_list = []
            for classname in class_name:
                remap_name = class_name_remap[classname]
                word_df = word_importance_model.ix[remap_name]
                important_list += list(flatten(word_df[[2]].head(20).values.tolist()))
            res['important'] = important_list

        except Exception as e:
            print(e)

        print("Important words:", res)
        return res
Exemple #10
0
    def post(self):
        res = {}
        try:
            # get all industry words
            base_word = request.json['base_word']
            class_name = request.json['class_name']
            industry_list = []
            for classname in class_name:
                value_name = label2value[classname]
                industry_df = industry_word_model.loc[
                industry_word_model.classname.str.contains(value_name)]
                industry_list += list(flatten(industry_df[[1]].head(20).values.tolist()))
            res['industry'] = industry_list

        except Exception as e:
            print(e)

        print("Industry words:", res)
        return res
Exemple #11
0
    def post(self):
        res = {}
        try:
            # get all important words
            base_word = request.json['base_word']
            class_name = request.json['class_name']
            print('recv classname:', class_name)
            important_list = []
            for classname in class_name:
                remap_name = class_name_remap[classname]
                word_df = word_importance_model.ix[remap_name]
                important_list += list(
                    flatten(word_df[[2]].head(20).values.tolist()))
            res['important'] = important_list

        except Exception as e:
            print(e)

        print("Important words:", res)
        return res
Exemple #12
0
    def post(self):
        res = {}
        try:
            # get all industry words
            base_word = request.json['base_word']
            class_name = request.json['class_name']
            industry_list = []
            for classname in class_name:
                value_name = label2value[classname]
                industry_df = industry_word_model.loc[
                    industry_word_model.classname.str.contains(value_name)]
                industry_list += list(
                    flatten(industry_df[[1]].head(20).values.tolist()))
            res['industry'] = industry_list

        except Exception as e:
            print(e)

        print("Industry words:", res)
        return res
Exemple #13
0
def get_tx_events(nodes):
    run('mkdir ./logs/')
    run('rm -rf ./logs/*_txs')
    all_events = pmap(get_tx_events_single_node, nodes)
    return sorted(data.flatten(all_events))
Exemple #14
0
#channels = range(0, 5) # DO NOT CHANGE
surrounding = 250

labels = [
    0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 2,
    1, 0, 0, 1, 2, 1, 2, 1, 2, 2, 0, 1, 0, 1, 2, 2, 0, 2, 2, 1, 2, 0, 0, 0, 1,
    0, 0, 2, 2, 2, 2, 2, 1, 2, 1, 0, 2, 2, 0, 0, 2, 0, 2, 2, 1, 1, 2, 2, 0, 1,
    1, 2, 1, 2, 1, 0, 0, 0, 2, 0, 1, 2, 2, 0, 0, 1, 0, 2, 1, 2, 2, 1, 2, 2, 1,
    0, 1, 0, 1, 1, 0, 1, 0, 0, 2, 2, 2, 0, 0, 1, 0, 2, 0, 2, 2, 0, 2, 0, 1, 0,
    1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 2, 0, 0, 2, 1, 2, 1, 2, 2, 1, 2, 0
]

sequence_groups = transform_data([
    data.flatten(
        data.process_scrambled(labels, ['colors2.txt'],
                               channels=channels,
                               sample_rate=250,
                               surrounding=surrounding,
                               exclude=set([71, 155, 317])))
])

#silence_group = transform_data(data.process(1, ['math6_silence.txt'], channels=channels, sample_rate=250, surrounding=surrounding))
silence_group = transform_data(
    data.process_silence_between(['colors2.txt'],
                                 channels=channels,
                                 sample_rate=250,
                                 surrounding=surrounding))

sequence_groups = np.array(list(sequence_groups) + list(silence_group))

print len(sequence_groups)
print map(len, sequence_groups)
Exemple #15
0
def get_tx_events(nodes, tx_filename):
    run('mkdir ./logs/')
    run('rm -rf ./logs/*_txs')
    all_events = pmap(
        lambda node: get_tx_events_single_node(node, tx_filename), nodes)
    return sorted(data.flatten(all_events))