def create_graph(self,
                     node_edge_pairs=[],
                     filepath=None,
                     graph_type='digraph'):
        if len(node_edge_pairs) == 0:
            start_time = time.time()
            data_loader = DataLoader(
                filepath=filepath,
                full_file=True,
                cols_to_load=['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT'])
            node_edge_pairs = data_loader.load()
            print("Data load from file took {} seconds".format(time.time() -
                                                               start_time))

        start_time = time.time()
        if graph_type == 'digraph':
            G = nx.DiGraph()
        elif graph_type == 'multidigraph':
            G = nx.MultiDiGraph()

        # Initialize loading counter.
        i = 1
        # Could also probably use 'add_edges_from' here: G.add_edges_from(node_edge_pairs[1:])
        for node_from, node_to in node_edge_pairs[1:]:
            # Counter to track progress loading
            if i % 10000 == 0:
                print(i)
            i += 1
            # Add nodes
            G.add_edge(node_from, node_to)
        end_time = time.time()
        print("Data load into graph took {} seconds.".format(end_time -
                                                             start_time))

        return G
Esempio n. 2
0
def Main():
    # load the data
    data = DataLoader()
    lambdas = [(i + 1) * 2.0 for i in range(32)]
    batch_size = 500
    num_batch = 2
    sigma = 1.0
    noise = 1e-3
    models = []
    keys = data.keys()

    for i in range(len(lambdas)):
        model = GraphLasso(lambdas[i],
                           batch_size=batch_size,
                           max_iter_outer=20,
                           max_iter_inner=20,
                           eps=1e-4)
        models.append(model)

    for i in range(len(models)):
        sum = np.zeros(shape=(len(keys) * len(keys), ))
        list_theta = []

        for each_batch in range(num_batch):
            X_batch, y_batch = data.sample_batch(500)
            # estimate the precision matrix
            theta = models[i].estimate(X_batch)
            sum = sum + theta.flat
            list_theta.append(theta.flat)
        list_theta = np.stack(list_theta, axis=0)
        # compute the training error
        mean = sum / float(num_batch)
        """
        cov = np.cov(np.transpose(list_theta))
        cov = cov + noise * np.identity(cov.shape[0])
        dist = multivariate_normal(mean=mean, cov=cov)
        surrogate = 0.0
        for i in range(num_batch):
            log_prob = dist.logpdf(list_theta[i])
            surrogate = surrogate + log_prob
        train_error = -2 * surrogate / float(batch_size)
        df = float(len(keys) * (len(keys) - 1)) / 2.0
        AIC = train_error + 2.0 * df * sigma * sigma / float(num_batch)
        print("AIC for lambda {}: {}".format(lambdas[i], AIC))
        """

        adj_matrix = np.reshape(mean, newshape=(len(keys), len(keys)))
        # compute the adjacent matrix
        adj_matrix[np.abs(adj_matrix) < 1e-9] = 0
        adj_matrix = np.abs(np.sign(adj_matrix))
        for diag in range(adj_matrix.shape[0]):
            adj_matrix[diag, diag] = 0

        graph = adj_matrix_to_graph(adj_matrix, keys, lambdas[i])
        graph.layout(prog='circo')
        graph.draw("./graphWithLambdaValue{}.png".format(lambdas[i]))
def multigraph_node_only_title_load_and_pickle():
    title_data_loader = DataLoader(
        'data/soc-redditHyperlinks-title.tsv',
        full_file=True,
        cols_to_load=['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT'])
    title_data = title_data_loader.load()

    nx_graph_creator = NetworkXGraphCreator()
    G = nx_graph_creator.create_graph(title_data, graph_type='multidigraph')
    pickle_obj(G, "data_pickle/networkx_multigraph_title.pickle")
def full_title_load_and_pickle():
    title_data_loader = DataLoader('data/soc-redditHyperlinks-title.tsv',
                                   full_file=True)
    title_data = title_data_loader.load()

    nx_attr_graph_creator = NetworkXAttributeGraphCreator()
    G_attr = nx_attr_graph_creator.create_graph(
        title_data,
        attr_names=['post_id', 'timestamp', 'post_label', 'post_properties'],
        graph_type='multidigraph')
    pickle_obj(G_attr, "data_pickle/networkx_attr_full_title.pickle")
def main():
    "main function"
    # optional command line args
    parser = argparse.ArgumentParser()
    parser.add_argument('--train', help='train the NN', action='store_true')
    parser.add_argument('--validate',
                        help='validate the NN',
                        action='store_true')
    parser.add_argument('--beamsearch',
                        help='use beam search instead of best path decoding',
                        action='store_true')
    parser.add_argument(
        '--wordbeamsearch',
        help='use word beam search instead of best path decoding',
        action='store_true')
    parser.add_argument('--dump',
                        help='dump output of NN to CSV file(s)',
                        action='store_true')

    args = parser.parse_args()

    decoderType = DecoderType.BestPath
    if args.beamsearch:
        decoderType = DecoderType.BeamSearch
    elif args.wordbeamsearch:
        decoderType = DecoderType.WordBeamSearch

    # train or validate on IAM dataset
    if args.train or args.validate:
        # load training data, create TF model
        loader = DataLoader(FilePaths.fnTrain, Model.batchSize, Model.imgSize,
                            Model.maxTextLen)

        # save characters of model for inference mode
        open(FilePaths.fnCharList, 'w').write(str().join(loader.charList))

        # save words contained in dataset into file
        open(FilePaths.fnCorpus, 'w').write(
            str(' ').join(loader.trainWords + loader.validationWords))

        # execute training or validation
        if args.train:
            model = Model(loader.charList, decoderType)
            train(model, loader)
        elif args.validate:
            model = Model(loader.charList, decoderType, mustRestore=True)
            validate(model, loader)

    # infer text on test image
    else:
        print(open(FilePaths.fnAccuracy).read())
        model = Model(open(FilePaths.fnCharList).read(),
                      decoderType,
                      mustRestore=True,
                      dump=args.dump)
        infer(model, FilePaths.fnInfer)
Esempio n. 6
0
 def fit(self, saveModel=True, saveuser_list=True, saveISBN_list=True, rating_num=10,
         ratingsFile="../data/BX-Book-Ratings.csv"):
     """
     加载并训练模型
     :param save:是否保存模型到本地
     :param saveuser_list:是否保存用户列表到本地
     :param saveISBN_list:是否保存书籍列表到本地
     :param rating_num:加载数据的条数
     :return:
     """
     dataLoader = DataLoader()
     # num: 获取的数据条数,决定了后边处理数据的时间,以及预测评分的时间
     ratings = dataLoader.getDataFrame(ratingsFile, ";", "utf-8", num=rating_num)
     self.R, self.user_list, self.ISBN_list = dataLoader.processDataFrametoArray(ratings)
     if saveModel:
         np.save("../Model/BookRecommendedModel.npy", self.R)
     if saveuser_list:
         with open("../Model/user_list", "w+", encoding="UTF-8") as f:
             f.write(str(self.user_list))
     if saveISBN_list:
         with open("../Model/ISBN_list", "w+", encoding="UTF-8") as f:
             f.write(str(self.ISBN_list))
Esempio n. 7
0
def main():

    config = ConfigReader()

    db = FitbitDatabase(config)
    # db.update_heart_rate()
    # db.update_sleep()


    dl = DataLoader(db)
    # hr_data = dl.get_heart_rate_data(start_date, end_date)
    # sleep_data = dl.get_sleep_data(start_date, end_date)
    #
    # print(hr_data)
    # print(sleep_data)

    da = DataAnalyzer(dl)

    mati_sleep_score = da.get_sleep_scores(
        get_date_from_string(MATI_START_DATE), get_date_from_string(MATI_END_DATE))

    mati_sleep_score = da.get_sleep_scores(
        get_date_from_string(MATI_START_DATE), get_date_from_string(MATI_END_DATE))
Esempio n. 8
0
def main():
    """ Main function """
    # Opptional command line args
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--train", help="train the neural network", action="store_true")
    parser.add_argument(
        "--validate", help="validate the neural network", action="store_true")
    parser.add_argument(
        "--wordbeamsearch", help="use word beam search instead of best path decoding", action="store_true")
    args = parser.parse_args()

    decoderType = DecoderType.BestPath
    if args.wordbeamsearch:
        decoderType = DecoderType.WordBeamSearch

    # Train or validate on Cinnamon dataset
    if args.train or args.validate:
        # Load training data, create TF model
        loader = DataLoader(FilePaths.fnTrain, Model.batchSize,
                            Model.imgSize, Model.maxTextLen, load_aug=True)

        # Execute training or validation
        if args.train:
            model = Model(loader.charList, decoderType)
            train(model, loader)
        elif args.validate:
            model = Model(loader.charList, decoderType, mustRestore=False)
            validate(model, loader)

    # Infer text on test image
    else:
        print(open(FilePaths.fnAccuracy).read())
        model = Model(open(FilePaths.fnCharList).read(),
                      decoderType, mustRestore=False)
        infer(model, FilePaths.fnInfer)
def create_and_pickle_combined_multigraph():
    print("loading body data")
    body_data_loader = DataLoader(
        'data/soc-redditHyperlinks-body.tsv',
        full_file=True,
        cols_to_load=['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT'])
    body_data = body_data_loader.load()
    print("body data length:", len(body_data))

    print("loading title data")
    title_data_loader = DataLoader(
        'data/soc-redditHyperlinks-title.tsv',
        full_file=True,
        cols_to_load=['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT'])
    title_data = title_data_loader.load()
    print("title data length:", len(title_data))

    print("creating graphs")
    networkx_loader = NetworkXGraphCreator()
    body_graph = networkx_loader.create_graph(node_edge_pairs=body_data,
                                              graph_type='multidigraph')
    print("body graph nodes:", body_graph.number_of_nodes())
    print("body graph edges:", body_graph.number_of_edges())
    title_graph = networkx_loader.create_graph(node_edge_pairs=title_data,
                                               graph_type='multidigraph')
    print("title graph nodes:", title_graph.number_of_nodes())
    print("title graph edges:", title_graph.number_of_edges())

    print("combining graphs")
    combined_graph = combine_graphs(body_graph, title_graph)
    print("combined graph nodes:", combined_graph.number_of_nodes())
    print("combined graph edges:", combined_graph.number_of_edges())

    print("pickling combined graph")
    pickle_obj(combine_graphs,
               "data_pickle/networkx_multigraph_combined.pickle")
Esempio n. 10
0
def main(path_):
    dl = DataLoader(path_)
    X = dl.clustering()
    clust = Clustering(X)
    clust.process()
Esempio n. 11
0
            'Recuperados': recovery_total
        },
        ignore_index=True)
    return raw_df


def process_town_names_by_region(raw_df: pd.DataFrame) -> dict:
    names = {}
    for region_id in raw_df['id_region'].unique():
        sub_df = raw_df[raw_df['id_region'] == region_id]
        names[region_id] = list(sub_df['nombre_comuna'].unique())
    return names


# getting and processing data
dataLoader = DataLoader()
region_ids = dataLoader.REGION_IDS

# region stats
country_stats = dataLoader.get_country_stats()
last_day, last_df = dataLoader.get_last_day()
reg_latest_accum = process_daily_df(last_df)
reg_stats = dataLoader.get_country_data()
region_opts = [{
    'label': region_ids[region_id],
    'value': region_id
} for region_id in region_ids.keys()]

# town data
town_stats = dataLoader.get_region_data()
town_names = process_town_names_by_region(town_stats[list(
def train_model(model, train_dir):
    loader = DataLoader(Model.batchSize, Model.imgSize, Model.maxTextLen,
                        train_dir)
    logging.info('Model chars: {chars}.'.format(chars=loader.charList))
    train(model, loader)
Esempio n. 13
0
def run(filename):
    "main function"
    # optional command line args

    parser = argparse.ArgumentParser()
    parser.add_argument('--train', help='train the NN', action='store_true')
    parser.add_argument('--validate',
                        help='validate the NN',
                        action='store_true')
    parser.add_argument('--beamsearch',
                        help='use beam search instead of best path decoding',
                        action='store_true')
    parser.add_argument(
        '--wordbeamsearch',
        help='use word beam search instead of best path decoding',
        action='store_true')
    parser.add_argument('--dump',
                        help='dump output of NN to CSV file(s)',
                        action='store_true')

    args = parser.parse_args()

    decoderType = DecoderType.BestPath
    if args.beamsearch:
        decoderType = DecoderType.BeamSearch
    elif args.wordbeamsearch:
        decoderType = DecoderType.WordBeamSearch

    # train or validate on IAM dataset
    if args.train or args.validate:
        # load training data, create TF model
        loader = DataLoader(FilePaths.fnTrain, Model.batchSize, Model.imgSize,
                            Model.maxTextLen)

        # save characters of model for inference mode
        open(FilePaths.fnCharList, 'w').write(str().join(loader.charList))

        # save words contained in dataset into file
        open(FilePaths.fnCorpus, 'w').write(
            str(' ').join(loader.trainWords + loader.validationWords))

        # execute training or validation
        if args.train:
            model = Model(loader.charList, decoderType)
            train(model, loader)
        elif args.validate:
            model = Model(loader.charList, decoderType, mustRestore=True)
            validate(model, loader)

    # infer text on test image
    else:
        index_list = []
        result_list = []
        prob_list = []
        print(open(FilePaths.fnAccuracy).read())
        model = Model(open(FilePaths.fnCharList).read(),
                      decoderType,
                      mustRestore=True,
                      dump=args.dump)

        for dirpath, dirnames, files in os.walk('../output_words/' + filename,
                                                topdown=False):
            for sub_file in sorted(files, key=getint):
                img_path = dirpath + '/' + sub_file
                # print('---------------------------------------------------')
                index_number, _ = str(sub_file).split('.')
                # print("File path: "+img_path)
                try:
                    result, prob = infer(model, img_path)
                except ValueError:
                    print("Value error")
                    continue
                # print(index_number, result, prob)
                index_list.append(index_number)
                result_list.append(result)
                prob_list.append(prob)

        return index_list, result_list, prob_list
Esempio n. 14
0
        help='Text file for summarization (encoding:"utf-8_sig")')
    parser.add_argument("-n",
                        default=3,
                        type=int,
                        help='Numbers of extraction summaries')
    parser.add_argument(
        "-lang",
        default='en',
        type=str,
        help=
        'If language of article isn\'t Englisth, will automatically translate by google'
    )
    parser.add_argument(
        "--super_long",
        action='store_true',
        help='If length of article >512, this option is needed')

    args = parser.parse_args()

    #    if args.super_long:
    #        sys.stdout.write('\n<Warning: Number of extractions might slightly altered since with --super_long option>\n')

    # Language initiator
    lf = LangFactory(args.lang)
    translator = None if args.lang in lf.support_lang else TranslatorY()

    data = DataLoader(args.txt_file, args.super_long, args.lang,
                      translator).data
    model = ModelLoader(lf.toolkit.cp, lf.toolkit.opt, args.lang)
    summarizer = Summarizer(data, model, args.n, translator)
Esempio n. 15
0
from src.DataLoader import DataLoader
import dash
import dash_table
import pandas as pd

dataLoader = DataLoader()
stats = dataLoader.get_country_stats()
day, df = dataLoader.get_last_day()
region_ids = dataLoader.REGION_IDS


def process_daily_df(raw_df: pd.DataFrame) -> pd.DataFrame:
    raw_df = raw_df.copy(deep=True)
    raw_df.sort_values(by='id_reg', inplace=True)
    raw_df.rename(columns={
        'nombre_reg': 'Región',
        'casos_totales': 'Casos',
        'fallecidos_totales': 'Fallecidos',
        'recuperados_totales': 'Recuperados'
    },
                  inplace=True)
    raw_df.drop(columns=[
        'casos_nuevos', 'fallecidos_nuevos', 'recuperados_nuevos', 'id_reg'
    ],
                inplace=True)
    return raw_df


df = process_daily_df(df)
app = dash.Dash(__name__)