Example #1
0
def init(params, metric, output_path, draw):
    # load graph structure
    def load_data(params):
        params["network_file"] = os.path.join(DATA_PATH,
                                              params["network_file"])
        G = getattr(dh, params["func"])(params)
        return G

    time_path = output_path + "_time"

    G = load_data(params["load_data"])

    module_embedding = __import__("init_embedding." +
                                  params["init_train"]["func"],
                                  fromlist=["init_embedding"]).NodeEmbedding
    ne = module_embedding(params["init_train"], G)
    print("after module_embedding")
    st = datetime.datetime.now()
    embeddings, weights = ne.train()
    ed = datetime.datetime.now()
    dh.append_to_file(time_path, str(ed - st) + "\n")
    with open(output_path + "_init", "w") as f:
        f.write(
            json.dumps({
                "embeddings": embeddings.tolist(),
                "weights": weights.tolist()
            }))
    metric(embeddings)
    draw(embeddings)
    return G, embeddings, weights
Example #2
0
def multilabel_classification(X, params):
    X_scaled = scale(X)
    n = len(X)
    if "label_mode" in params:
        y = dh.load_multilabel_ground_truth(params["ground_truth"], n, params["label_mode"])
    else:
        y = dh.load_multilabel_ground_truth(params["ground_truth"], n)

    print y.shape
    print X.shape
    y = y[:n]
    acc = 0.0
    micro_f1 = 0.0
    macro_f1 = 0.0
    for _ in xrange(params["times"]):
        X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size = params["test_size"])
        clf = getattr(mll, params["model"]["func"])(X_train, y_train, params["model"])
        ret = mll.infer(clf, X_test, y_test)
        acc += ret[1]
        y_score = ret[0]
        micro_f1 += f1_score(y_test, y_score, average='micro')
        macro_f1 += f1_score(y_test, y_score, average='macro')

    acc /= float(params["times"])
    micro_f1 /= float(params["times"])
    macro_f1 /= float(params["times"])
    return {"acc" : acc, "micro_f1": micro_f1, "macro_f1": macro_f1}
Example #3
0
File: main.py Project: lundu28/GNE
def extract_tree(params):
    g = dh.load_graph(os.path.join(DATA_PATH, params["network_file"]))
    g_mat = dh.transfer_to_matrix(g)
    eh = __import__('extract_hierarchy.' + params["extract_hierarchy_model"]["func"], fromlist = ["extract_hierarchy"])
    tree = eh.extract_hierarchy(g, params["extract_hierarchy_model"], )

    return g_mat, tree
Example #4
0
def metric(params, info, pre_res, **kwargs):
    res = params_handler(params, info, pre_res)

    # load node number
    node_path=os.path.join(DATA_PATH, params["data"], "node.txt")
    node_file=open(node_path, 'r')
    nodes=node_file.readlines()
    node_num=len(nodes)
    node_file.close()

    # load embeddings 
    if params["file_type"] == "txt":
        embedding_path=os.path.join(DATA_PATH, "experiment", params["embeddings_file"])
        X = dh.load_embedding(embedding_path, params["file_type"], node_num)
    else:
        embedding_path = os.path.join(DATA_PATH, "experiment", params["embeddings_file"])
        X = dh.load_embedding(embedding_path, params["file_type"],node_num)

    # results include: accuracy, micro f1, macro f1
    metric_res = classification(X, params)

    # insert into res
    for k, v in metric_res.items():
        res[k] = v

    return res
Example #5
0
def metric(params, info, pre_res, **kwargs):
    res = params_handler(params, info)

    mus, row2name = dh.load_dict(os.path.join(params["res_home"], "mus.dat"))
    sigs, row2name = dh.load_dict(os.path.join(params["res_home"], "sigs.dat"))
    std_sigs = np.sqrt(sigs)

    assert len(mus) > 0, "The mus file has no data"

    N = len(mus)
    M = len(mus[0])

    # sigs is spherical or diagonal
    if len(std_sigs[0]) == 1:
        ones = np.ones_like(mus)
        tmp = std_sigs.reshape(N, 1)
        std_sigs = ones * tmp

    # dimension reduction
    if M > 2:
        mus, std_sigs = ct.reduce_dist_dim(mus, std_sigs, 2)

    res["ellipse_path"] = os.path.join(params["res_home"], "dist_ellipse.pdf")

    dg.draw_ellipse(mus, std_sigs, row2name, res["ellipse_path"],
                    params["timesOfSigma"])

    return res
Example #6
0
def train_model(params):
    g_mat, tree = extract_tree(params)

    handlers = {}
    handlers["get_network"] = gn(g_mat, params["get_network_hierarchy"])
    handlers["embedding_model"] = __import__(
        'node_embedding.' + params["embedding_model"]["func"],
        fromlist=["node_embedding"]).NodeEmbedding
    handlers["transfer_embeddings"] = __import__(
        'transfer_embeddings.' + params["transfer_embeddings"]["func"],
        fromlist=["transfer_embeddings"]).TransferEmbedding

    res_coordinates = [None] * len(tree)
    res_coordinates[len(tree) - 1] = np.zeros(
        params["embedding_model"]["embedding_size"], dtype=np.float32)
    res_radius = [None] * len(tree)
    res_radius[len(tree) - 1] = float(params["init_radius"])
    dfs(len(tree) - 1, tree, handlers, params, res_radius, res_coordinates)

    res_path = params["train_output"]
    dh.symlink(res_path, os.path.join(RES_PATH, "new_train_res"))
    dh.append_to_file(
        res_path,
        json.dumps({
            "radius": np.array(res_radius).tolist(),
            "coordinates": np.array(res_coordinates).tolist()
        }))

    return res_coordinates, res_radius
Example #7
0
def main_old():

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('--operation',
                        type=str,
                        default="all",
                        help="[all | init | train | metric | draw]")
    parser.add_argument('--conf', type=str, default="default")
    args = parser.parse_args()
    params = dh.load_json_file(os.path.join(CONF_PATH, args.conf + ".json"))

    metric_path_pre = os.path.join(RES_PATH, args.conf)
    if os.path.exists(metric_path_pre) == False:
        os.mkdir(metric_path_pre)
    output_path = os.path.join(metric_path_pre, dh.get_time_str())
    metric_path = output_path + "_metric"

    def metric(embeddings):
        if "metrics" not in params:
            return
        for metric in params["metrics"]:
            res = getattr(Metric, metric["func"])(embeddings, metric)
            dh.append_to_file(metric_path, str(res) + "\n")
            print res

    dh.symlink(metric_path, os.path.join(metric_path_pre, "new_metric"))

    if "drawers" in params:
        draw_path = output_path + "_draw"
        if os.path.exists(draw_path) == False:
            os.mkdir(draw_path)
    draw_cnt = [0]

    def draw(embeddings):
        if "drawers" not in params:
            return
        for drawer in params['drawers']:
            getattr(Metric, drawer["func"])(embeddings, drawer, draw_path,
                                            draw_cnt[0])
        draw_cnt[0] += 1

    if args.operation == "all":
        G, embeddings, weights = __import__("init." + params["init"]["func"],
                                            fromlist=["init"]).init(
                                                params["init"], metric,
                                                output_path, draw)
        __import__("dynamic_loop." + params["main_loop"]["func"],
                   fromlist=["dynamic_loop"]).loop(params["main_loop"], G,
                                                   embeddings, weights, metric,
                                                   output_path, draw)
    elif args.operation == "init":
        G, embeddings, weights = __import__("init." + params["init"]["func"],
                                            fromlist=["init"]).init(
                                                params["init"], metric,
                                                output_path, draw)
    elif args.operation == "draw":
        pass
    else:
        print "Not Support!"
Example #8
0
 def metric(embeddings):
     if "metrics" not in params:
         return
     for metric in params["metrics"]:
         res = getattr(Metric, metric["func"])(embeddings, metric)
         dh.append_to_file(metric_path, str(res) + "\n")
         print res
Example #9
0
    def test(self,
             model_path: str,
             comment: str = None,
             num_words: int = NUM_WORDS,
             max_comment_length: int = MAX_COMMENT_LENGTH) -> None:
        """
        It tests entered comments for abusiveness
        """
        def predict(comment: str):
            comment = dh.clean_comment(comment)
            print(
                'Prediction:',
                tc.predict_proba(np.array([comment]),
                                 num_words=num_words,
                                 sequence_length=max_comment_length,
                                 batch_size=1))

        tc = TextClassifier()
        dh = DataHandler()
        print(f'Loading model from {model_path}')
        tc.load(model_path)
        if comment is not None:
            predict(dh.clean_comment(comment))
        else:
            try:
                print('Enter a message. '
                      'I will tell you whether it is abusive or not')
                print('To exit, please, press Ctrl+C')
                while True:
                    comment = input('--> ')
                    predict(dh.clean_comment(comment))
            except KeyboardInterrupt:
                return
Example #10
0
def loop(params, G, embeddings, weights, metric, output_path, draw):
    params["get_next"]["input_file"] = os.path.join(
        DATA_PATH, params["get_next"]["input_file"])
    module_next = __import__("get_next." + params["get_next"]["func"],
                             fromlist=["get_next"]).GetNext
    gn = module_next(params["get_next"])

    params_new = params["new_embedding"]
    module_new_embedding = __import__("init_embedding." + params_new["func"],
                                      fromlist=["new_embedding"]).NodeEmbedding

    def new_embedding(G, init_embeddings, init_weights, n):
        ne = module_new_embedding(params_new, G)
        embeddings, weights = ne.train()
        return embeddings, weights

    time_path = output_path + "_time"
    dynamic_embeddings = []
    while True:
        num_new = gn.get_next(G)
        if num_new == 0:
            break
        st = datetime.datetime.now()
        embeddings, weights = new_embedding(G, embeddings, weights, num_new)
        ed = datetime.datetime.now()
        dh.append_to_file(time_path, str(ed - st) + "\n")
        res = metric(embeddings)
        draw(embeddings)
        dynamic_embeddings.append({
            "embeddings": embeddings.tolist(),
            "weights": weights.tolist()
        })

    with open(output_path + "_dynamic", "w") as f:
        f.write(json.dumps(dynamic_embeddings))
 def metric(embeddings):
     if "metrics" not in params:
         return
     for metric in params["metrics"]:
         print("[] Start node classification...")
         res = getattr(Metric, metric["func"])(embeddings, metric)
         dh.append_to_file(metric_path, str(res) + "\n")
         print("[+] Metric: " + str(res))
Example #12
0
def infer(params, info, pre_res, **kwargs):
    res = params_handler(params, info, pre_res)
    embeds = dh.get_tagonehot(
        os.path.join(info["network_folder"]["name"],
                     info["network_folder"]["mix_edge"]))
    dh.save_as_pickle(embeds, res["entity_embedding_path"])

    return res
Example #13
0
 def new_embedding(G, init_embeddings, init_weights, u):
     unigrams_in = dh.in_degree_distribution(G)
     unigrams_out = dh.out_degree_distribution(G)
     bs = module_new_batch(G, u, params_new["batch_strategy"])
     ne = module_new_embedding(params_new["embedding_model"],
                               init_embeddings, init_weights, unigrams_in,
                               unigrams_out)
     embeddings, weights = ne.train(bs.get_batch, 1001)
     return embeddings, weights
Example #14
0
def optimize(params, info, pre_res, **kwargs):
    #pdb.set_trace()
    res = params_handler(params, info, pre_res)

    G_entity = dh.load_entity_as_graph(os.path.join(info["network_folder"]["name"], info["network_folder"]["edge"]), \
            os.path.join(info["network_folder"]["name"], info["network_folder"]["mix_edge"]), \
            os.path.join(info["network_folder"]["name"], info["network_folder"]["entity"]))  # G.node[id]["tags"] = binary lst tag
    G_tag = dh.load_edge_as_graph(params["walk_file"], \
                os.path.join(info["network_folder"]["name"], info["network_folder"]["tag"])) # walk file
    params["embedding_model"]["en_num"] = len(G_entity.nodes())
    params["embedding_model"]["tag_num"] = len(G_tag.nodes())
    info["en_num"] = params["embedding_model"]["en_num"]
    info["tag_num"] = params["embedding_model"]["tag_num"]

    # get features
    gf_handler = __import__("get_features." + params["get_features"]["func"],
                            fromlist=["sget_features"])

    if "dim" not in params["get_features"]:
        params["get_features"]["dim"] = params["tag_num"]

    features = gf_handler.get_features(params["get_features"],
                                       info)  # return numpy

    # model init
    print("[+] The embedding model is model.%s" %
          (params["embedding_model"]["func"]))
    info["logger"].info("[+] The embedding model is model.%s" %
                        (params["embedding_model"]["func"]))

    params["embedding_model"]["aggregator"]["feature_num"] = params[
        "get_features"]["dim"]

    model_handler = __import__("model." + params["embedding_model"]["func"],
                               fromlist=["model"])
    model = model_handler.TagConditionedEmbedding(params["embedding_model"],
                                                  features)
    model.build_graph()

    # batch generator
    print("[+] The batch strategy is batch_strategy.%s" %
          (params["batch_strategy"]))
    info["logger"].info("[+] The batch strategy is batch_strategy.%s\n" %
                        (params["batch_strategy"]))
    bs_handler = __import__("batch_strategy." + params["batch_strategy"],
                            fromlist=["batch_strategy"])
    bs = bs_handler.BatchStrategy(G_tag, G_entity, params)

    # train model
    res["model_path"] = model.train(bs.get_batch)

    # infer model
    return res
Example #15
0
def infer(params, info, pre_res, **kwargs):
    res, G_entity, G_tag, features = params_handler(params, info, pre_res)

    model_handler = __import__("model." + params["embedding_model"]["func"],
                               fromlist=["model"])
    model = model_handler.TagConditionedEmbedding(params["embedding_model"],
                                                  features)
    bs_handler = __import__("batch_strategy." + params["batch_strategy"],
                            fromlist=["batch_strategy"])
    bs = bs_handler.BatchStrategy(G_tag, G_entity, params)
    embeds = model.infer(bs.get_all(), params["embedding_model"]["model_path"])
    dh.save_as_pickle(embeds, res["entity_embedding_path"])

    return res
Example #16
0
def init(params, metric, output_path, draw):
    params['output_path'] = output_path

    time_path = output_path + "_time"

    start_time = datetime.datetime.now()
    ut.sage_main(params)
    train_time = datetime.datetime.now() - start_time
    print("the train_time is" + str(train_time))
    dh.append_to_file(time_path, str(train_time) + "\n")
    G = None
    embedding = None
    weight = None
    return G, embedding, weight
Example #17
0
    def get_account_info_from_csv(self, email):
        account_information = {
            'name': None,
            'address': None,
            'city_state_postal_code': None,
            'country': None,
            'phone': None
        }

        account_name = DataHandler().test_data(
            'first_name', email) + " " + DataHandler().test_data(
                'last_name', email)
        account_information['name'] = account_name

        account_address = str(DataHandler().test_data('address_1', email))
        account_information['address'] = account_address

        account_city_state_postal = DataHandler().test_data('city', email) + ", " \
                                    + DataHandler().test_data('state', email) + " " + str(
            DataHandler().test_data('postal_code', email))
        account_information[
            'city_state_postal_code'] = account_city_state_postal

        account_country = DataHandler().test_data('country', email)
        account_information['country'] = account_country

        account_phone = str(DataHandler().test_data('mobile_phone', email))
        account_information['phone'] = account_phone

        return account_information
Example #18
0
def visualization(X, params):
    ground_truth_path = os.path.join(DATA_PATH, params["data"],
                                     params["ground_truth"])
    y = dh.load_ground_truth(ground_truth_path)
    y = y[:len(X)]

    row = len(X)
    column = len(X[0])

    if column > 2:
        X = ct.reduce_embedding_dim(X, 2)

    X = scale(X)

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.set_title('Scatter Plot')
    plt.xlabel('X')
    plt.ylabel('Y')
    cValue = ct.label2color(y)
    ax.scatter(X[:, 0], X[:, 1], c=cValue, cmap='viridis', marker='s')
    #plt.legend('x1')
    scatter_path = os.path.join(params["res_home"],
                                params["embeddings_file"] + "scatter.pdf")
    plt.savefig(scatter_path)
    plt.show()

    return {"scatter_path": scatter_path}
Example #19
0
File: main.py Project: aries-M/TCNE
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument("--conf", type=str, default="toy")
    parser.add_argument("--log",
                        type=int,
                        default=0,
                        help="0 if log print out in screen else 1")
    parser.add_argument("--level",
                        type=str,
                        default="INFO",
                        help="log level = INFO | DEBUG")
    args = parser.parse_args()
    params = dh.load_json(os.path.join(CONF_PATH, args.conf + ".json"))
    info = init(args, params["static_info"], params)
    info["logger"].debug("log level is DEBUG")
    info["logger"].info("init finished! \n %s \n" % (info))

    res = {}
    for module in params["run_modules"]:
        info["logger"].info("run module: %s" % (module["func"]))
        mdl_name = module["func"]
        mdl_params = module["params"]
        mdl = __import__(mdl_name + "." + mdl_params["func"],
                         fromlist=[mdl_name])
        res[mdl_name] = getattr(mdl, mdl_name)(mdl_params,
                                               info=info,
                                               pre_res=res,
                                               mdl_name=mdl_name)
Example #20
0
def init(params, metric, output_path, draw):
    embeddings_path = os.path.join(RES_PATH, params["embeddings_path"])
    dic = dh.load_json_file(embeddings_path)
    embeddings = np.array(dic["embeddings"])
    metric(embeddings)
    draw(embeddings)
    return None, None, None
Example #21
0
def loop(params, G, embeddings, weights, metric, output_path, draw):
    embeddings_path = os.path.join(RES_PATH, params["embeddings_path"])
    dynamic_embeddings = dh.load_json_file(embeddings_path)
    for items in dynamic_embeddings:
        embeddings = np.array(items["embeddings"])
        metric(embeddings)
        draw(embeddings)
Example #22
0
 def train(self,
           data_path: str,
           model_path: str = None,
           glove_path: str = r'./data/glove.840B.300d.txt',
           embedding_dim: int = 300,
           num_words: int = NUM_WORDS,
           max_comment_length: int = MAX_COMMENT_LENGTH,
           epochs: int = 10,
           batch_size: int = 512) -> None:
     """
     It trains a model
     """
     print(f'Loading data from {data_path}')
     dh = DataHandler(data_path)
     tc = TextClassifier()
     print('Fitting to data')
     tc.fit(dh.X_train,
            dh.y_train,
            num_words=num_words,
            glove_path=glove_path,
            embedding_dim=embedding_dim,
            sequence_length=max_comment_length,
            validation_data=(dh.X_val, dh.y_val),
            epochs=epochs,
            batch_size=batch_size)
     tc.save(model_path)
     preds = tc.predict_proba(dh.X_test,
                              num_words=num_words,
                              sequence_length=max_comment_length,
                              batch_size=batch_size)
     print('ROC_AUC score for test data:', roc_auc_score(dh.y_test, preds))
Example #23
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('--conf', type=str, default="default")
    args = parser.parse_args()
    params = dh.load_json_file(
        os.path.join(MULTI_CONF_PATH, args.conf + ".json"))

    out_path = os.path.join(RES_PATH,
                            "multi_res_" + str(int(time.time() * 1000.0)))
    single_params = {}
    for item in params.items():
        if item[0] == "models":
            continue
        single_params[item[0]] = item[1]
    for m in params["models"]:
        for it in m.items():
            if it[0] == "traversal":
                continue
            else:
                single_params[it[0]] = it[1]
        if "traversal" not in m:
            tmp = []
        else:
            tmp = [item for item in m["traversal"].items()]
        with open(out_path, "a") as f:
            dfs(tmp, single_params, f)

    try:
        os.symlink(out_path, os.path.join(RES_PATH, "MultiRes"))
    except OSError:
        os.remove(os.path.join(RES_PATH, "MultiRes"))
        os.symlink(out_path, os.path.join(RES_PATH, "MultiRes"))
Example #24
0
def classification(X, params):
    res = {}
    X_scaled = scale(X)
    y = dh.load_ground_truth(params["ground_truth"])
    y = y[:len(X)]
    #print(len(y))
    #print("y_0=",y[0])
    ts = 0.0
    for i in range(9):
        ts += 0.1
        acc = 0.0
        micro_f1 = 0.0
        macro_f1 = 0.0
        for _ in range(params["times"]):
            X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size = ts, stratify = y,random_state=params["np_seed"])
            clf = getattr(mll, params["model"]["func"])(X_train, y_train, params["model"])
            ret = mll.infer(clf, X_test, y_test)
            acc += ret[1]
            y_score = ret[0]
            micro_f1 += f1_score(y_test, y_score, average='micro')
            macro_f1 += f1_score(y_test, y_score, average='macro')

        acc /= float(params["times"])
        micro_f1 /= float(params["times"])
        macro_f1 /= float(params["times"])
        print("test_size:",ts)
        res["%.2f" % ts] = {"acc" : acc, "micro_f1": micro_f1, "macro_f1": macro_f1}
        print({"acc" : acc, "micro_f1": micro_f1, "macro_f1": macro_f1})
    return res
Example #25
0
File: main.py Project: lundu28/GNE
def main():

    parser = argparse.ArgumentParser(
                formatter_class = argparse.RawTextHelpFormatter)
    parser.add_argument('--operation', type = str, default = "all", help = "[all | extract_tree | train | metric]")
    parser.add_argument('--conf', type = str, default = "default")
    parser.add_argument('--metric_input', type = str, default = "new_train_res")
    parser.add_argument('--train_output', type = str, default = str(int(time.time() * 1000.0)))
    parser.add_argument('--metric_output', type = str, default = str(int(time.time() * 1000.0)))

    args = parser.parse_args()
    params = dh.load_json_file(os.path.join(CONF_PATH, args.conf + ".json"))
    params["metric_input"] = os.path.join(RES_PATH, args.metric_input)
    params["train_output"] = os.path.join(RES_PATH, "train_res_" + args.train_output)
    params["metric_output"] = os.path.join(RES_PATH, "metric_res_" + args.metric_output)


    if args.operation == "all":
        train_model(params)
        metric(params)
    elif args.operation == "extract_tree":
        extract_tree(params)
    elif args.operation == "train":
        train_model(params)
    elif args.operation == "metric":
        metric(params)
    else:
        print "Not Support!"
Example #26
0
def extract_hierarchy(G, params):
    g, n, m = dh.load_tree(os.path.join(DATA_PATH, params["file_path"]))
    tree = [None] * n
    for u in g:
        tree[u] = Node(u, set(g[u].keys()), set())
    dfs(n - 1, tree)
    return tree
Example #27
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('--operation',
                        type=str,
                        default="all",
                        help="[all | train | metric | draw]")
    parser.add_argument('--conf', type=str, default="default")
    parser.add_argument('--iteration', type=int, default=10001)
    parser.add_argument('--model', type=str, default="model_simple")
    args = parser.parse_args()
    params = dh.load_json_file(
        os.path.join(SINGLE_CONF_PATH, args.conf + ".json"))
    params["iteration"] = args.iteration
    params["model"] = args.model

    if args.operation == "all":
        train_model(params)
        metric(params)
    elif args.operation == "train":
        train_model(params)
    elif args.operation == "metric":
        metric(params)
    elif args.operation == "draw":
        pass
    else:
        print "Not Support!"
Example #28
0
def metric(params):
    G_truth = dh.load_ground_truth(
        os.path.join(DATA_PATH, params["ground_truth_file"]))
    ret = []
    for metric in params["metric_function"]:
        ret.append(getattr(Metric, metric["func"])(G_truth, metric))
    return ret
Example #29
0
def classification(X, params):
    res = {}
    X_scaled = scale(X)
    ground_truth_path=os.path.join(DATA_PATH, params["data"],params["ground_truth"])
    y = dh.load_ground_truth(ground_truth_path)
    y = y[:len(X)]
    #print(X_scaled.shape)
    #print(len(y))
    #print("y_0=",y[0])
    acc = 0.0
    micro_f1 = 0.0
    macro_f1 = 0.0
    n_train = params["n_train"]
    print("number_of_train_set", n_train)
    for _ in range(params["times"]):
        X_train, X_test, y_train, y_test = X[:n_train, :], X[n_train:, :], y[:n_train], y[n_train:]
        clf = getattr(mll, params["model"]["func"])(X_train, y_train, params["model"])
        ret = mll.infer(clf, X_test, y_test)
        acc += ret[1]
        y_score = ret[0]
        micro_f1 += f1_score(y_test, y_score, average='micro')
        macro_f1 += f1_score(y_test, y_score, average='macro')

    acc /= float(params["times"])
    micro_f1 /= float(params["times"])
    macro_f1 /= float(params["times"])
    res = {"acc" : acc, "micro_f1": micro_f1, "macro_f1": macro_f1}
    print({"acc" : acc, "micro_f1": micro_f1, "macro_f1": macro_f1})
    return res
Example #30
0
def main():
    parser = argparse.ArgumentParser(formatter_class = argparse.RawTextHelpFormatter)

    parser.add_argument("--conf", type = str, default = "lc")

    parser.add_argument("--level", type = str, default = "INFO", help="log level = INFO | DEBUG")
    args = parser.parse_args()
    params = dh.load_json(os.path.join(CONF_PATH, args.conf + ".json"))
    info = init(args, params["static_info"], params)
    info["logger"].info("init finished! \n %s \n" %(info))
    info["logger"].debug("log level is DEBUG")

    res = {}
    for module in params["run_modules"]:
        mdl_name = module["func"]
        mdl_params = module["params"]
        print (mdl_name)

        if info["debug_level"] == 'DEBUG':
            pdb.set_trace()

        #if mdl_name in ["metric"]:
        #    continue

        mdl = __import__(mdl_name + "." + mdl_params["func"], fromlist=[mdl_name])
        res[mdl_name] = getattr(mdl, mdl_name)(mdl_params, info = info, pre_res = res, mdl_name = mdl_name)