Beispiel #1
0
def main():
    # Loading Parameters
    parser = init_parameters()
    args, _ = parser.parse_known_args()

    # Updating Parameters (cmd > yaml > default)
    args = update_parameters(parser, args)

    # Setting save_dir
    save_dir = get_save_dir(args)
    U.set_logging(save_dir)
    with open('{}/config.yaml'.format(save_dir), 'w') as f:
        yaml.dump(vars(args), f)

    # Processing
    if args.generate_data or args.generate_label:
        g = Generator(args)
        g.start()

    elif args.extract or args.visualization:
        if args.extract:
            p = Processor(args, save_dir)
            p.extract()
        if args.visualization:
            v = Visualizer(args)
            v.start()

    else:
        p = Processor(args, save_dir)
        p.start()
    def run_naive_bayes_topic(self):
        '''
        This function takes no inputs and returns nothing.
        Function will:
            - Load the corpus to a pandas dataframe.
            - Perform train test split on the dataset.
            - Perform preprocessing on the text and create TF-IDF array of the corpus.
            - Train Naive Bayes model for topic classification on TF-IDF and save as a .pkl file to the models directory.
            - Print performance metrics to the console and save a .png file of Confusion Matrix.
        '''

        print("Running Naive Bayes Classification with TF-IDF")
        twitter = pd.read_csv('../data/full-corpus.csv', encoding='utf-8')
        viz = Visualizer()
        nb = Naive_Bayes()
        dfc = DF_Cleaner()
        
        '''
        Topic Classification with Naive Bayes
        '''
        y = twitter.pop('Topic')
        X_train, X_test, y_train, y_test = train_test_split(twitter, y, random_state=42)

        train_text = X_train['TweetText'].to_numpy()
        test_text = X_test['TweetText'].to_numpy()

        X_train_counts, X_train_tfidf = nb.compute_tf_and_tfidf(train_text)
        y_pred = nb.classify(X_train_tfidf, y_train, test_text)
        nb.print_metrics(y_test, y_pred)
        nb.pickle_model(filepath_cv='../models/count_vect_companies.pkl', filepath_clf='../models/naive_bayes_companies.pkl')
        viz.plot_confusion_matrix(y_test, y_pred, classes=['apple', 'google', 'microsoft', 'twitter'], \
                                  title='Multinomial Naive Bayes with TF-IDF')
        plt.savefig('../media/confusion_matrix/tfidf_nb_confmat_companies.png')
        plt.close()
        print('\n\n')
Beispiel #3
0
def main():
    parser = Init_parameters()

    # Update parameters by yaml
    args = parser.parse_args()
    if os.path.exists('/home/aayadi/projet/RA-GCNv22/configs/' + args.config +
                      '.yaml'):
        with open(
                '/home/aayadi/projet/RA-GCNv22/configs/' + args.config +
                '.yaml', 'r') as f:
            yaml_arg = yaml.load(f, Loader=yaml.FullLoader)
            default_arg = vars(args)
            for k in yaml_arg.keys():
                if k not in default_arg.keys():
                    raise ValueError('Do NOT exist the parameter {}'.format(k))
            parser.set_defaults(**yaml_arg)
    else:
        raise ValueError('Do NOT exist this config: {}'.format(args.config))

    # Update parameters by cmd
    args = parser.parse_args()

    # Show parameters
    print('\n************************************************')
    print('The running config is presented as follows:')
    v = vars(args)
    for i in v.keys():
        print('{}: {}'.format(i, v[i]))
    print('************************************************\n')

    # Processing
    os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(list(map(str, args.gpus)))
    if args.visualization:
        if args.extract:
            p = Processor(args)
            p.extract()

        print('Starting visualizing ...')
        v = Visualizer(args)
        v.show_wrong_sample()
        v.show_important_joints()
        v.show_heatmap()
        v.show_skeleton()
        print('Finish visualizing!')

    else:
        p = Processor(args)
        p.start()
    def run_doc2vec_naivebayes(self):
        '''
        This function takes no inputs and returns nothing.
        Function will:
            - Load data to pandas dataframe.
            - Balance the corpus so that there is an equal amount of tweets for each sentiment and drop tweets labeled with irrelevant sentiment.
            - Perform train test split on the dataset.
            - Perform preprocessing on the text and create Doc2Vec array of the corpus.
            - Train Naive Bayes model for sentiment classification on Doc2Vec and save as a .pkl file to the models directory.
            - Print performance metrics to the console and save a .png file of Confusion Matrix.
        '''

        print("Running Naive Bayes Classification with Doc2Vec")
        twitter = pd.read_csv('../data/full-corpus.csv', encoding='utf-8')
        dfc = DF_Cleaner()
        viz = Visualizer()

        # Balancing and Train Test Split
        pos_df, neg_df, neutral_df, irr_df = dfc.get_sentiment_df(twitter)
        balanced_df = dfc.balance_df([neg_df, neutral_df], pos_df)
        train, test = train_test_split(balanced_df, test_size=0.3, random_state=42) 

        '''
        Sentiment Classification with Naive Bayes and Doc2Vec
        '''
        d2v = My_Doc2Vec()
        test_tagged, train_tagged = d2v.tag_doc(test, train)
        d2v.create_model_and_vocab(train_tagged)
        d2v.train_model(test_tagged, train_tagged)

        y_train, X_train = d2v.vec_for_learning(train_tagged)
        y_test, X_test = d2v.vec_for_learning(test_tagged)

        clf = GaussianNB()
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        
        print('Testing accuracy %s' % accuracy_score(y_test, y_pred))
        print('Testing F1 score: {}'.format(f1_score(y_test, y_pred, average='weighted')))

        d2v.pickle_model(clf, filepath='../models/doc2vec_naive_bayes.pkl')
        viz.plot_confusion_matrix(y_test, y_pred, classes=['positive', 'negative', 'neutral', 'irrelevant'], \
                                  title='Guassian Navie Bayes with Doc2Vec')
        plt.savefig('../media/confusion_matrix/d2v_nb_confmat.png')
        plt.close()
    def run_naive_bayes_sentiment(self):
        '''
        This function takes no inputs and returns nothing.
        Function will:
            - Load data to pandas dataframe.
            - Balance the corpus so that there is an equal amount of tweets for each sentiment and drop tweets labeled with irrelevant sentiment.
            - Perform train test split on the dataset.
            - Perform preprocessing on the text and create TF-IDF array of the corpus.
            - Train Naive Bayes model for sentiment classification on TF-IDF and save as a .pkl file.
            - Print performance metrics to the console and save a .png file of Confusion Matrix.
        '''

        print("Running Naive Bayes Classification with TF-IDF")
        twitter = pd.read_csv('../data/full-corpus.csv', encoding='utf-8')
        viz = Visualizer()
        nb = Naive_Bayes()
        dfc = DF_Cleaner()
        
        '''
        Sentiment Classification with Naive Bayes
        '''
        pos_df, neg_df, neutral_df, irr_df = dfc.get_sentiment_df(twitter)
        balanced_df = dfc.balance_df([neg_df, neutral_df], pos_df)
        y = balanced_df.pop('Sentiment')
        X_train, X_test, y_train, y_test = train_test_split(balanced_df, y, random_state=42)

        train_text = X_train['TweetText'].to_numpy()
        test_text = X_test['TweetText'].to_numpy()

        X_train_counts, X_train_tfidf = nb.compute_tf_and_tfidf(train_text, ngram_range=(1, 5))
        y_pred = nb.classify(X_train_tfidf, y_train, test_text)

        nb.print_metrics(y_test, y_pred)
        nb.pickle_model(filepath_cv='../models/count_vect_sent.pkl', filepath_clf='../models/naive_bayes_sent.pkl')
        viz.plot_confusion_matrix(y_test, y_pred, classes=['positive', 'negative', 'neutral'], \
                                  title='Multinomial Naive Bayes with TF-IDF')
        plt.savefig('../media/confusion_matrix/tfidf_nb_confmat_sentiment.png')
        plt.close()
        print('\n\n')
def visualize_3d(config, event_df, withNN=False, vertex_stats=None):
    cfg_vis = config['visualize']
    assert cfg_vis['mode'] == '3d'

    visualizer_all_tracks = Visualizer(event_df, 'ALL TRACKS')
    visualizer_lost_tracks = Visualizer(event_df, 'LOST TRACKS')
    visualizer_found_tracks = Visualizer(event_df, 'FOUND TRACKS')
    visualizer_all_tracks.add_coord_planes(config['stations_sizes'])
    visualizer_lost_tracks.add_coord_planes(config['stations_sizes'])
    visualizer_found_tracks.add_coord_planes(config['stations_sizes'])
    if withNN:
        event_df_tracks = event_df[event_df.track != -1]
        batch_tracks_hits, batch_track_idx, short_tracks, short_tracks_idxs, \
        short_track_ellipses, lost_tracks, track_lost_last_ellipse = reconstruct_event(event_df, get_nn(config['network']),
                                                                                       6, config['z_stations'], config['stations_sizes'], vertex_stats=vertex_stats)
        #visualizer.init_draw(reco_tracks=batch_track_idx)
        visualizer_lost_tracks.init_draw(reco_tracks=lost_tracks,
                                         draw_all_hits=True)
        for ind, (last_index, ell) in enumerate(track_lost_last_ellipse):
            visualizer_lost_tracks.add_nn_pred(
                last_index, lost_tracks[ind][last_index - 1], ell[:2], ell[2:])

        visualizer_found_tracks.init_draw(reco_tracks=batch_track_idx)
        visualizer_all_tracks.init_draw(draw_all_tracks_from_df=True)
        visualizer_found_tracks.draw(False)
        visualizer_lost_tracks.draw(False)
        visualizer_all_tracks.draw(True)
    else:
        visualizer = Visualizer(event_df, 'ALL TRACKS')
        visualizer.init_draw(draw_all_tracks_from_df=True)
        visualizer.draw()
def run_lda(corpus,
            num_topics=4,
            custom_stopwords=False,
            filepath_wc=None,
            make_vis=True,
            filepath_lda=None):
    '''
    Running LDA with Gensim
    '''
    cleaner = Gensim_LDA(corpus)
    viz = Visualizer()

    if custom_stopwords:
        # Using custom StCleaner(opwords
        cleaner.tokenize_corpus(custom_stopwords=True)
        word_count = cleaner.wc_whole_corpus()
        if filepath_wc is None:
            viz.plot_wc(word_count, filepath='media/tf_custom_sw.png')
        else:
            viz.plot_wc(word_count, filepath=filepath_wc)
    else:
        # Using Gensim Stopwords
        cleaner.tokenize_corpus()
        word_count = cleaner.wc_whole_corpus()
        if filepath_wc is None:
            viz.plot_wc(word_count, filepath='media/tf_whole_corpus.png')
        else:
            viz.plot_wc(word_count, filepath=filepath_wc)

    cleaner.create_bow()
    lda = cleaner.create_lda_model(num_topics=num_topics)
    cleaner.print_top_words(lda)
    cleaner.print_perplexity_coherence(lda)
    if make_vis:
        viz.make_pyLDAvis(lda,
                          cleaner.bow,
                          cleaner.id2word,
                          filepath=filepath_lda)

    return cleaner, lda
Beispiel #8
0
def run_visualizer(particle_filter):
    # visualizer yes/no logic
    visualizer = Visualizer(particle_filter)
    # start it
    visualizer.start()
Beispiel #9
0
def test_market():

    # Environment variables
    outdir = '/home/younesz/Desktop/SUM'
    window_state = 32
    open_cost = 3
    time_difference = True
    wavelet_channels = 0
    batch_size = 16
    rootStore = open('../dbloc.txt', 'r').readline().rstrip('\n')

    # Agent specific options
    #agent_opt 	=	{'type': 'DQN', 'acSpace':5, 'lr':1e-3, 'nz':'dummy', 'batch_size':batch_size, 'action_labels':['short100', 'short50', 'neutral', 'long50' ,'long100']}
    agent_opt = {
        'type': 'DDPG',
        'acSpace': 1,
        'lr': [1e-4, 1e-3],
        'nz': 'dummy',
        'batch_size': batch_size,
        'action_labels': ['continuous'],
        'action_conversion': None
    }

    # import market environment
    from src.emulator import Market
    from src.sampler import SinSampler, BTCsampler
    #sampler 	= 	SinSampler('single', 180, 1.5, (20, 40), (49, 50), fld=outdir)
    sampler = BTCsampler(False,
                         wavelet_channels=0,
                         variables=['Close'],
                         fld=path.join(rootStore, 'data', 'BTCsampler',
                                       'db_bitcoin.csv'))
    env = Market(sampler,
                 window_state,
                 open_cost,
                 time_difference=time_difference,
                 wavelet_channels=wavelet_channels,
                 action_range=[-1, 1],
                 action_labels=agent_opt['action_labels'])

    # Set agent
    agent = Agent(agent_opt['type'],
                  window_state,
                  agent_opt['acSpace'],
                  layer_units=[80, 60],
                  noise_process=agent_opt['nz'],
                  outputdir=outdir,
                  learning_rate=agent_opt['lr'],
                  batch_size=agent_opt['batch_size'])
    agent.p_model = agent.model
    fld_save = path.join(
        rootStore, 'results', sampler.title, agent_opt['type'],
        str((env.window_state, sampler.window_episode, batch_size,
             agent_opt['lr'], agent.discount_factor, 0, env.open_cost)))

    # Set visualizer
    from src.visualizer import Visualizer
    visualizer = Visualizer(env.action_labels)

    # Set simulator
    from src.simulators import Simulator
    simulator = Simulator(agent, env, visualizer=visualizer, fld_save=fld_save)
    simulator.agent_opt = agent_opt
    # Train
    simulator.train(200,
                    save_per_episode=1,
                    exploration_decay=0.99,
                    learning_rate=agent_opt['lr'],
                    exploration_min=0.05,
                    print_t=False,
                    exploration_init=0.8)
    # Test
    simulator.test(50, save_per_episode=1, subfld='in-sample testing')
Beispiel #10
0
def main():
    parser = Init_parameters()

    # Update parameters by yaml
    args = parser.parse_args()
    if os.path.exists('./configs/' + args.config + '.yaml'):
        with open('./configs/' + args.config + '.yaml', 'r') as f:
            yaml_arg = yaml.load(f, Loader=yaml.FullLoader)
            default_arg = vars(args)
            for k in yaml_arg.keys():
                if k not in default_arg.keys():
                    raise ValueError('Do NOT exist the parameter {}'.format(k))
            parser.set_defaults(**yaml_arg)
    else:
        raise ValueError('Do NOT exist this config: {}'.format(args.config))
    # Update parameters by cmd
    args = parser.parse_args()
    # Show parameters
    print('\n************************************************')

    #if type(args.gpus) == int:
    #    n = args.gpus
    #    if n == 4:
    #        args.gpus = [0, 1, 2, 3]
    #    else:
    #        args.gpus = [0]

    print('The running config is presented as follows:')
    print_default_keys = ['config', 'batch_size', 'pretrained', 'model_stream']
    print_eval_keys = [
        'occlusion_part', 'occlusion_time', 'occlusion_block',
        'occlusion_rand', 'jittering_joint', 'jittering_frame', 'sigma'
    ]

    v = vars(args)
    if '-g' in sys.argv or '--gpus' in sys.argv:
        aa = args.gpus
        args.gpus = [int(x) for x in aa.split(',')]
    else:
        if node == 'obama':
            args.gpus = [0, 1, 2, 3]
        elif node == 'puma':
            args.gpus = [0]
        else:
            args.gpus = [0]

    for i in v.keys():
        if i in print_default_keys:
            print('{}: {}'.format(i, v[i]))

    if args.evaluate:
        for i in v.keys():
            if i in print_eval_keys:
                if v[i]:
                    print('{}: {}'.format(i, v[i]))

    print('************************************************\n')
    # Processing
    os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(list(map(str, args.gpus)))
    if args.visualization:
        if args.extract:
            p = Processor(args)
            p.extract()

        print('Starting visualizing ...')
        v = Visualizer(args)
        v.show_wrong_sample()
        v.show_important_joints()
        v.show_heatmap()
        v.show_skeleton()
        print('Finish visualizing!')

    else:

        if args.baseline:
            p = Processor_BS(args)
        else:
            p = Processor(args)
        p.start()
Beispiel #11
0
    def __init__(self):
        self.l = logging.getLogger(__name__ + "." + self.__class__.__name__)
        cols, rows = shutil.get_terminal_size((100, 50))
        os.environ['COLUMNS'] = str(cols)

        ap = argparse.ArgumentParser()
        subParser = ap.add_subparsers(dest="subap", help='Advanced Options')
        editAp = subParser.add_parser("edit")
        gEditAp = editAp.add_mutually_exclusive_group(required=True)
        insertAp = subParser.add_parser("insert")
        gInsertAp = insertAp.add_mutually_exclusive_group(required=True)

        ap.add_argument("-n",
                        "--stamp-new",
                        dest="stamp_new",
                        help="Starts a new workday",
                        action="store_true")
        ap.add_argument("-p",
                        "--stamp-pause",
                        dest="stamp_pause",
                        help="Pauses the current workday",
                        action="store_true")
        ap.add_argument("-r",
                        "--stamp-resume",
                        dest="stamp_resume",
                        help="Resumes the current workday",
                        action="store_true")
        ap.add_argument("-e",
                        "--stamp-end",
                        dest="stamp_end",
                        help="Ends the workday",
                        action="store_true")
        ap.add_argument("-S",
                        "--display-saldo",
                        dest="display_saldo",
                        help="Displays the time saldo",
                        action="store_true")
        ap.add_argument("-L",
                        "--display-last",
                        dest="display_last",
                        help="Displays latest ended workday",
                        action="store_true")
        ap.add_argument("-D",
                        "--display-day",
                        dest="display_info",
                        default=None,
                        const=time.time(),
                        nargs="?",
                        metavar="dd.mm.yyyy",
                        help="Displays info about a workday")
        ap.add_argument("-W",
                        "--display-week",
                        dest="display_week",
                        default=None,
                        const=time.time(),
                        nargs="?",
                        metavar="dd.mm.yyyy",
                        help="Displays summary of week")
        ap.add_argument("-M",
                        "--display-month",
                        dest="display_month",
                        default=None,
                        const=time.time(),
                        nargs="?",
                        metavar="mm.yyyy",
                        help="Displays summary of month")
        ap.add_argument("-Y",
                        "--display-year",
                        dest="display_year",
                        default=None,
                        const=time.time(),
                        nargs="?",
                        metavar="yyyy",
                        help="Displays summary of the year")
        ap.add_argument("-X",
                        "--display-proc",
                        dest="display_proc",
                        help="Shows how long the calculation took",
                        action="store_true")
        gEditAp.add_argument(
            "-s",
            "--set-start",
            dest="set_start",
            metavar=("<HH:MM>", "dd.mm.yyyy"),
            nargs="+",
            help=
            "Set the start time from the given day. When no day is given either the last open day (1st) or the last closed day (2nd) is choosen."
        )
        gEditAp.add_argument(
            "-e",
            "--set-end",
            dest="set_end",
            metavar=("<dd.mm.yyyy:HH:MM>", "dd.mm.yyyy"),
            nargs="+",
            help=
            "Set the end time for the given day. When no day is given, then the last closed day is choosen."
        )
        gEditAp.add_argument(
            "-S",
            "--move-start",
            dest="move_start",
            metavar=("<<s/+>HH:MM>", "dd.mm.yyyy"),
            nargs="+",
            help=
            "Moves the start time from the given day (+=forward, s=backward). When no day is given either the last open day (1st) or the last closed day (2nd) is choosen."
        )
        gEditAp.add_argument(
            "-E",
            "--move-end",
            dest="move_end",
            metavar=("<<s/+>HH:MM>", "dd.mm.yyyy"),
            nargs="+",
            help=
            "Moves the end time from the given day (+=forward, s=backward). When no day is given, then the last closed day is choosen."
        )
        gInsertAp.add_argument(
            "-n",
            "--workday",
            dest="insert_workday",
            metavar=("<dd.mm.yyyy:HH:MM>", "<HH:MM>"),
            nargs=2,
            help=
            "Inserts a new workday at the given day and time with the specified length as positive offset."
        )
        gInsertAp.add_argument(
            "-b",
            "--break",
            dest="insert_break",
            metavar=("<dd.mm.yyyy>", "<dd.mm.yyyy:HH:MM>", "<HH:MM>"),
            nargs=3,
            help=
            "Inserts a new break into the given workday, starting from the given day and time, with the specified positive offset for the break end"
        )

        #gEditAp.add_argument("-b", "--insert-break", dest="insert_break", metavar=("<dd.mm.yyyy>", "<HH:MM>", "<+HH:MM>"), nargs=3, help="Insert a break into the given day, at the given time with the given offset.")

        now = time.time() * 1000
        SettingsHelper.rangesToArray()
        stamper = Stamper()
        visualizer = Visualizer()
        self.print_head()
        args = ap.parse_args()

        if args.display_saldo:
            visualizer.saldo()
        elif args.display_month:
            if (isinstance(args.display_month, str)):
                args.display_month = datetime.strptime(args.display_month,
                                                       "%m.%Y").timestamp()
            visualizer.month(args.display_month)
        elif args.display_week:
            if (isinstance(args.display_week, str)):
                args.display_week = datetime.strptime(args.display_week,
                                                      "%d.%m.%Y").timestamp()
            visualizer.week(args.display_week)
        elif args.display_year:
            if (isinstance(args.display_year, str)):
                args.display_year = datetime.strptime(args.display_year,
                                                      "%Y").timestamp()
            visualizer.year(args.display_year)
        elif args.display_last:
            visualizer.last()
        elif args.stamp_new:
            stamper.new()
        elif args.stamp_pause:
            stamper.pause()
        elif args.stamp_resume:
            stamper.resume()
        elif args.stamp_end:
            stamper.end()

        if args.display_info:
            if (isinstance(args.display_info, str)):
                ts = datetime.strptime(args.display_info,
                                       "%d.%m.%Y").timestamp()
                visualizer.day(ts)
            else:
                visualizer.ongoing()

        if args.subap == "edit":
            if args.set_start and len(args.set_start) >= 1:
                newTime = datetime.strptime(
                    args.set_start[0],
                    "%H:%M").timestamp() + 2208992400  #epoch
                ts = None
                if (len(args.set_start) >= 2):
                    ts = datetime.strptime(args.set_start[1],
                                           "%d.%m.%Y").timestamp()
                stamper.moveStart(newTime,
                                  ts=ts,
                                  visualizer=visualizer,
                                  noOffset=True)
            elif args.set_end and len(args.set_end) >= 1:
                newTime = 0
                setDirect = False
                setFromDaystart = False
                if ("." in args.set_end[0]):
                    newTime = datetime.strptime(args.set_end[0],
                                                "%d.%m.%Y:%H:%M").timestamp()
                    setDirect = True
                else:
                    newTime = datetime.strptime(
                        args.set_end[0],
                        "%H:%M").timestamp() + 2208992400  #epoch
                    setFromDaystart = True
                ts = None
                if (len(args.set_end) >= 2):
                    ts = datetime.strptime(args.set_end[1],
                                           "%d.%m.%Y").timestamp()
                stamper.moveEnd(newTime,
                                ts=ts,
                                visualizer=visualizer,
                                setDirect=setDirect,
                                setFromDaystart=setFromDaystart)
            elif args.move_start and len(args.move_start) >= 1:
                newOffsetStr = args.move_start[0][1:]
                newOffset = datetime.strptime(
                    newOffsetStr, "%H:%M").timestamp() + 2208992400  #epoch
                if (args.move_start[0][:1] == "s"):
                    newOffset = newOffset * -1
                ts = None
                if (len(args.move_start) >= 2):
                    ts = datetime.strptime(args.move_start[1],
                                           "%d.%m.%Y").timestamp()
                stamper.moveStart(newOffset, ts=ts, visualizer=visualizer)
            elif args.move_end and len(args.move_end) >= 1:
                newOffsetStr = args.move_end[0][1:]
                newOffset = datetime.strptime(
                    newOffsetStr, "%H:%M").timestamp() + 2208992400  #epoch
                if (args.move_end[0][:1] == "s"):
                    newOffset = newOffset * -1
                ts = None
                if (len(args.move_end) >= 2):
                    ts = datetime.strptime(args.move_end[1],
                                           "%d.%m.%Y").timestamp()
                stamper.moveEnd(newOffset, ts=ts, visualizer=visualizer)
            elif args.insert_break and len(args.insert_break) == 3:
                pass

        if args.subap == "insert":
            if args.insert_workday:
                insertAt = datetime.strptime(args.insert_workday[0],
                                             "%d.%m.%Y:%H:%M").timestamp()
                offset = Utils.convertHMToSeconds(args.insert_workday[1],
                                                  separator=":")
                stamper.insert_workday(insertAt,
                                       offset,
                                       setDirect=False,
                                       visualizer=visualizer)
            elif args.insert_break:
                insertAt = datetime.strptime(args.insert_break[0],
                                             "%d.%m.%Y").timestamp()
                bStart = datetime.strptime(args.insert_break[1],
                                           "%d.%m.%Y:%H:%M").timestamp()
                offset = Utils.convertHMToSeconds(args.insert_break[2],
                                                  separator=":")
                stamper.insert_break(insertAt,
                                     bStart,
                                     offset,
                                     setDirect=False,
                                     visualizer=visualizer)

        if (args.display_proc):
            self.l.info("Calculation took " + str((time.time() * 1000) - now) +
                        "ms")
                    type=int,
                    help='Number of units to skip before each vector',
                    default=1,
                    nargs='?')
parser.add_argument('--bound',
                    metavar='bound',
                    type=str,
                    help='Number of values to show. Eg. -10,10',
                    nargs='?',
                    default="-10,10")
parser.add_argument(
    '--prop',
    metavar='prop',
    type=int,
    help='Set this value to change the cutoff for changing color',
    default=0,
    nargs='?')

args = parser.parse_args()
if args.mode.upper() == "COLOR":
    v = Visualizer(f_x=args.fx, f_y=args.fy)
    v.plot_color(bound=tuple(map(int, args.bound.split(','))),
                 skip=args.skip,
                 prop=args.prop)
elif args.mode.upper() == "BLACK":
    v = Visualizer(f_x=args.fx, f_y=args.fy)
    v.plot(
        bound=tuple(map(int, args.boundx.split(','))),
        skip=args.skip,
    )
    def make_plots(self):
        '''
        This function takes no inputs and returns nothing.
        Function will:
            - Load data to pandas dataframe.
            - Create bar chart of 20 most common words in the corpus.
            - Create word clouds of words relating to tweets for all the different sentiments, all topics, and the whole corpus.
            - Create bar chart of the number of tweets labeled with each sentiment.
            - Create bar chart of the number of tweets labeled with each topic.
            - All plots produced are saved as .png files to the media directory in their appropriate subdirectories.
        '''

        print("Creating Plots of the data")
        twitter = pd.read_csv('../data/full-corpus.csv', encoding='utf-8')
        viz = Visualizer()
        dfc = DF_Cleaner()

        pos_df, neg_df, neutral_df, irr_df = dfc.get_sentiment_df(twitter)
        apple_df, google_df, ms_df, twitter_df = dfc.get_topics_df(twitter)


        # Remove stop words and perform lemmatization to create Pandas Series
        processed_docs = twitter['TweetText'].apply(lambda x: dfc.preprocess(x, remove_common=False))
        processed_pos = pos_df['TweetText'].apply(lambda x: dfc.preprocess(x, remove_common=True))
        processed_neg = neg_df['TweetText'].apply(lambda x: dfc.preprocess(x, remove_common=True))
        processed_neutral = neutral_df['TweetText'].apply(lambda x: dfc.preprocess(x, remove_common=True))
        processed_apple = apple_df['TweetText'].apply(lambda x: dfc.preprocess(x, remove_common=False))
        processed_google = google_df['TweetText'].apply(lambda x: dfc.preprocess(x, remove_common=False))
        processed_ms = ms_df['TweetText'].apply(lambda x: dfc.preprocess(x, remove_common=False))
        processed_twitter = twitter_df['TweetText'].apply(lambda x: dfc.preprocess(x, remove_common=False))


        # Converting Pandas Series to numpy array
        doc_array = processed_docs.to_numpy()
        pos_doc = processed_pos.to_numpy()
        neg_doc = processed_neg.to_numpy()
        neutral_doc = processed_neutral.to_numpy()
        apple_doc = processed_apple.to_numpy()
        google_doc = processed_google.to_numpy()
        ms_doc = processed_ms.to_numpy()
        twitter_doc = processed_twitter.to_numpy()

        
        # Creating dictionary of word counts
        word_counts = dfc.wc_corpus(doc_array)
        pos_wordcounts = dfc.wc_corpus(pos_doc)
        neg_wordcounts = dfc.wc_corpus(neg_doc)
        neutral_wordcounts = dfc.wc_corpus(neutral_doc)

        
        # Converting Corpus numpy array to one giant string for word cloud
        big_string = dfc.doc_array_to_str(doc_array)
        pos_string = dfc.doc_array_to_str(pos_doc)
        neg_string = dfc.doc_array_to_str(neg_doc)
        neutral_string = dfc.doc_array_to_str(neutral_doc)
        apple_string = dfc.doc_array_to_str(apple_doc)
        google_string = dfc.doc_array_to_str(google_doc)
        ms_string = dfc.doc_array_to_str(ms_doc)
        twitter_string = dfc.doc_array_to_str(twitter_doc)


        print("creating bar plot of word counts")
        viz.plot_wc(word_counts, filepath='../media/tf/tf_whole_corpus.png', title='20 Most Common Words in Corpus')

        print("creating word clouds")
        viz.plot_wordcloud(big_string, title="All Tweets", filepath="../media/tf/word_cloud_all_tweets.png")
        viz.plot_wordcloud(pos_string, title="Positive Tweets", filepath="../media/tf/word_cloud_pos_tweets.png")
        viz.plot_wordcloud(neg_string, title="Negative Tweets", filepath="../media/tf/word_cloud_neg_tweets.png")
        viz.plot_wordcloud(neutral_string, title="Neutral Tweets", filepath="../media/tf/word_cloud_neutral_tweets.png")
        viz.plot_wordcloud(apple_string, title="Apple Tweets", filepath="../media/tf/word_cloud_apple_tweets.png")
        viz.plot_wordcloud(google_string, title="Google Tweets", filepath="../media/tf/word_cloud_google_tweets.png")
        viz.plot_wordcloud(ms_string, title="Microsoft Tweets", filepath="../media/tf/word_cloud_ms_tweets.png")
        viz.plot_wordcloud(twitter_string, title="Twitter Tweets", filepath="../media/tf/word_cloud_twitter_tweets.png")

        print("creating bar plot of sentiments")
        viz.plot_sentiments_bar()

        print("creating bar plot of categories")
        viz.plot_categories_bar()
        print('\n\n')
Beispiel #14
0
    ensemble_solver.solve()
    field = 0

    mean = ensemble_solver.means["value"][field]
    square = ensemble_solver.means["square"][field]

    step_errors = ensemble_solver.step_errors["value"]
    sample_errors = ensemble_solver.sample_errors["value"]

    print(f"Max step error =   {step_errors[:, -1].max()}")
    print(f"Max sample error = {sample_errors[:, -1].max()}")

    vis = Visualizer(
        mean, (0, tmax),
        lattice,
        sample_error=ensemble_solver.sample_errors["value"][field],
        step_error=ensemble_solver.step_errors["value"][field])

    vis2 = Visualizer(
        square, (0, tmax),
        lattice,
        sample_error=ensemble_solver.sample_errors["square"][field],
        step_error=ensemble_solver.step_errors["square"][field])

    mesh_points = min([30, points, steps])
    fig, ax = vis.surface(cstride=points // mesh_points,
                          rstride=int(steps / resolution) // mesh_points)

    fig2, ax2 = vis.steady_state(label="Numerical solution",
                                 marker='.',