コード例 #1
0
def demo2():
    cl.section('Demo 2')

    username = ''
    while not username:
        username = cl.input('Username: '******''
    while not password:
        password = cl.password('Password: '******'Successfully logged in.')

    with cl.progress('Checking for update...', mode=cl.PROGRESS_SPIN):
        time.sleep(3)

    choice = ''
    while choice.lower() not in {'y', 'n'}:
        choice = cl.question(
            'A new version is present, would you like to update? (Y/N)').strip(
            )

    if choice.lower() == 'y':
        with cl.progress('Downloading ', mode=cl.PROGRESS_DETERMINATE) as p:
            time.sleep(1)
            p.update(0.2, ' 20% (1MB/5MB) ETA 4s')
            time.sleep(2)
            p.update(0.4, ' 40% (2MB/5MB) ETA 3s')

        cl.error('Failed to download package. SSL handshake error.')
    else:
        cl.warning('Update delayed!')
コード例 #2
0
def demo1():
    cl.section('Demo 1')

    cl.info('Test program started.')

    with cl.progress('Running test case 1...', cl.PROGRESS_SPIN, erase=True):
        time.sleep(3)
    cl.success('Test case 1: Passed')

    with cl.progress('Running test case 2...', cl.PROGRESS_SPIN, erase=True):
        time.sleep(3)
    cl.success('Test case 2: Passed')

    with cl.progress('Running test case 3...', cl.PROGRESS_SPIN, erase=True):
        time.sleep(3)
    cl.success('Test case 3: Passed')

    with cl.progress('Running test case 4...', cl.PROGRESS_SPIN, erase=True):
        time.sleep(3)
    cl.error('Test case 4: Failed')

    cl.info('Input: 1111')
    cl.info('Expected output: 2222')
    cl.info('Got: 3333')

    cl.section('Test Result')
    cl.info('3 out of 4 test cases passed.')
    cl.info('Pass rate: 75%')
コード例 #3
0
def animations():
    cl.section('Progress Animations')

    cl.item('Static')
    cl.progress('Downloading...')
    time.sleep(3)

    cl.item('Spin')
    with cl.progress('Downloading...', mode=cl.PROGRESS_SPIN):
        time.sleep(3)

    cl.item('Expand')
    with cl.progress('Downloading', mode=cl.PROGRESS_EXPAND):
        time.sleep(6)

    cl.item('Move')
    with cl.progress('Downloading ', mode=cl.PROGRESS_MOVE):
        time.sleep(4)

    cl.item('Determinate')
    with cl.progress('Downloading ', mode=cl.PROGRESS_DETERMINATE) as p:
        time.sleep(1)
        p.update(0.2, ' 20% (1MB/5MB) ETA 4s')
        time.sleep(1)
        p.update(0.4, ' 40% (2MB/5MB) ETA 3s')
        time.sleep(1)
        p.update(0.6, ' 60% (3MB/5MB) ETA 2s')
        time.sleep(1)
        p.update(0.8, ' 80% (4MB/5MB) ETA 1s')
        time.sleep(1)
        p.update(1, ' 100% (5MB/5MB)')
コード例 #4
0
def text_preprocessor(input_filename,
                      *,
                      preprocessor_cls='TextPreprocessor',
                      custom_stop_words=None,
                      lem_ignore_patterns=None,
                      remove_duplicates=False):
    cl.section('Text Preprocessor')

    input_filename = data_source_file(input_filename)
    preprocessor_cls = globals()[preprocessor_cls]

    with TimeMeasure('preprocess_text'):
        result = preprocess_csv(input_filename,
                                preprocessor_cls=preprocessor_cls,
                                custom_stop_words=custom_stop_words,
                                lem_ignore_patterns=lem_ignore_patterns)

        if remove_duplicates:
            result = remove_duplicate_text(result)

        result = tuple(result)
        cl.info('Effective data size: %d' % len(result))

    with TimeMeasure('save_preprocessed'):
        save_preprocessed(result, input_filename)
コード例 #5
0
def retweets_recover(csvfilename):
    cl.section('Retweets Recover')
    cl.info('Recovering file: %s' % csvfilename)

    csvfilename = data_source_file(csvfilename)
    result = recover_from_csv(csvfilename)
    exportfilename = name_with_title_suffix(csvfilename, '-recovered')
    export_csv(result, exportfilename)
    return os.path.basename(exportfilename)
コード例 #6
0
def welcome():
    cl.section('ColorLabels Demo')
    cl.newline()
    cl.item('1. Overview of Labels')
    cl.item('2. Progress Animations')
    cl.item('3. Show Demo 1')
    cl.item('4. Show Demo 2')
    cl.item('5. Exit')
    cl.newline()
コード例 #7
0
def overview():
    cl.section('Overview of Labels')
    cl.success('Good job! All test cases passed!')
    cl.warning('Warning! Security update delayed!')
    cl.error('Error! Failed to write file!')
    cl.info('Server listening on port 8888.')
    cl.progress('Downloading package, please wait...')
    cl.plain('Nothing interesting.')
    cl.question('A new version is present, would you like to update? (Y/N)')
コード例 #8
0
def model_analyzer(modeldesc, sourcedesc, *, num_top_words=30,
                   num_top_docs=30, debug=False):
    cl.section('LDA Model Analyzer')
    cl.info('Model description: %s' % modeldesc)
    cl.info('Source description: %s' % sourcedesc)

    with TimeMeasure('load_all'):
        ldamodel, corpus, prep_items, source_texts = load_all(modeldesc,
                                                              sourcedesc)

    with TimeMeasure('analyzing'):
        prep_ids = tuple(item[0] for item in prep_items)
        dictionary = ldamodel.id2word
        num_topics = ldamodel.num_topics
        topics = [{
                    'topic_id': i,
                    'words': get_topic_words(ldamodel, i, num_top_words),
                    'popularity': 0.0,
                    'documents': collections.defaultdict(float)
                } for i in range(num_topics)]

        if debug:
            debugfilename = model_file('ldadoctopics-%s.txt' % modeldesc)
            with open(debugfilename, 'w', encoding='utf-8') as debugfile:
                for index, doc in enumerate(corpus):
                    text_id = prep_ids[index]
                    doc_topics = ldamodel.get_document_topics(doc)
                    text = source_texts[text_id].strip()
                    debugfile.write('%s -> %r, %s\n' % (text_id, doc_topics,
                                                        text))

        term_topics_cache = {}

        for word in dictionary:
            term_topics_cache[word] = ldamodel.get_term_topics(word)

        for index, doc in enumerate(corpus):
            for topic_id, prob in ldamodel.get_document_topics(doc):
                topics[topic_id]['popularity'] += prob

            for word, freq in doc:
                if word not in dictionary:
                    continue

                for topic_id, prob in term_topics_cache[word]:
                    topics[topic_id]['documents'][index] += prob * freq

        for topic in topics:
            topic['documents'] = get_topic_top_docs(topic['documents'],
                                                    num_top_docs,
                                                    prep_ids, source_texts)

        topics = sorted(topics, key=lambda x: x['popularity'], reverse=True)

    with TimeMeasure('export_markdown'):
        export_markdown(modeldesc, sourcedesc, topics)
コード例 #9
0
def visualization_twlda(keyword, desc, desc_show, userinfofile, topusers=20,
                        encoding='utf-8', portable=True, open_browser=True):
    cl.section('Twitter-LDA Visualization')

    user_topic = parse_user_topic(desc, encoding=encoding)
    topic_words = parse_topic_words(desc, encoding=encoding)
    user_info = load_user_info(data_source_file(userinfofile))
    result = organize_data('test', user_topic, topic_words, user_info,
                           topusers)
    return export_html(keyword, desc_show, result, portable, open_browser)
コード例 #10
0
def twlda_multiple_run(num_topics_range,
                       iteration,
                       desc_prefix,
                       show_console_output=True):
    cl.section('Twitter-LDA Multiple Run')

    for topics in num_topics_range:
        cl.info('Running with %d topics' % topics)
        twitter_lda(output_desc='%s-%d' % (desc_prefix, topics),
                    topics=topics,
                    iteration=iteration,
                    show_console_output=show_console_output)
コード例 #11
0
def data_retriever(data_source,
                   query,
                   save_filename,
                   *,
                   lang='',
                   proxy=None,
                   remove_duplicates=False,
                   twapi_max=None,
                   twapi_sleep_time=0,
                   twscrape_poolsize=20,
                   twscrape_begindate=None,
                   ghapi_org=None,
                   ghapi_since=None,
                   soapi_begindate=None):
    cl.section('Data Retriever')
    cl.info('Starting to retrieve query: %s, or org: %s' % (query, ghapi_org))
    cl.info('From data source: %s' % data_source)
    cl.info('Using proxy: %s' % proxy)
    cl.info('Remove duplicates: %s' % remove_duplicates)

    if proxy:
        os.environ['HTTP_PROXY'] = proxy
        os.environ['HTTPS_PROXY'] = proxy

    if data_source == 'twitter_standard_api':
        data = twapi_search(query,
                            twapi_max,
                            sleep_time=twapi_sleep_time,
                            lang=lang)
    elif data_source == 'twitterscraper':
        data = twscrape_search(query,
                               lang=lang,
                               poolsize=twscrape_poolsize,
                               begindate=twscrape_begindate)
    elif data_source == 'github_api':
        data = github_issue_org_fetch(ghapi_org, ghapi_since)
    elif data_source == 'stackoverflow_api':
        data = soapi_search(query, begindate=soapi_begindate)
    else:
        cl.error('Data source %r is not implemented' % data_source)
        sys.exit(-1)

    if remove_duplicates:
        data = iterator_aggregate_list(data)
        data_no_duplicate_text = remove_duplicate_text(data)
        cl.info('Exporting data without duplicate text')
        export_csv(data_no_duplicate_text, data_source_file(save_filename))

        save_filename_full = name_with_title_suffix(save_filename, '-full')
        cl.info('Exporting full data')
        export_csv(data, data_source_file(save_filename_full))
    else:
        export_csv(data, data_source_file(save_filename))
コード例 #12
0
def random_sampler(csvfilename, amount):
    cl.section('Data Random Sampler')
    cl.info('Random sampling file: %s' % csvfilename)
    cl.info('Amount: %d' % amount)

    csvfilename = data_source_file(csvfilename)
    data = list(csv_reader(csvfilename))

    random.shuffle(data)
    data = data[:amount]

    exportfilename = name_with_title_suffix(csvfilename, '-sample-%d' % amount)
    export_csv(data, exportfilename)
コード例 #13
0
def text_preprocessor_user(sourcedesc):
    cl.section('Text Preprocessor Grouping By User')

    assert re.fullmatch(r'[-_0-9a-zA-Z+]+', sourcedesc)

    csvfilename = data_source_file('%s.csv' % sourcedesc)

    with TimeMeasure('preprocess_text'):
        result = list(preprocess_csv(csvfilename))

    with TimeMeasure('save_preprocessed'):
        savefilename = name_with_title_suffix(csvfilename, '-user')
        export_csv(result, savefilename)
コード例 #14
0
def show_level_stats(level_status, items_per_row=5):
    cl.newline()
    cl.section('Level stats:')
    field_width = len(str(NUM_LEVELS))
    rows = math.ceil(NUM_LEVELS / items_per_row)
    for row in range(rows):
        cl.info(' '.join(
            show_level_block(x + 1, field_width, level_status[x])
            for x in range(row * items_per_row,
                           min((row + 1) * items_per_row, NUM_LEVELS))))
    cl.progress(f'Your progress: {sum(level_status)}/{NUM_LEVELS}')
    check_milestones(level_status)
    cl.newline()
コード例 #15
0
def main():
    cl.section('Welcome to Python Challenges')
    cl.info(f'Python version: {PYTHON_VERSION}')
    level_status = [False] * NUM_LEVELS
    while True:
        show_level_stats(level_status)

        cl.info(f'Enter a level number (1-{NUM_LEVELS}) to solve a level, '
                'or enter 0 to view source code')
        level_number = get_level_number()

        if level_number == 0:
            print(SOURCE, end='')
            continue

        if level_status[level_number - 1]:
            cl.success('You already solved this level')
            continue

        level_func = globals()[f'level_{level_number}']
        answer = get_input(f'Your answer for level {level_number}: ')

        timer = threading.Timer(CHALLENGE_TIMEOUT, die, args=('Timeout!', ))
        timer.start()

        try:
            global_check(answer)
            answer = ast.literal_eval(answer.strip())
        except Exception:  # pylint: disable=broad-except
            timer.cancel()
            cl.error('Wrong answer')
            if DEBUG_MODE:
                traceback.print_exc(file=sys.stdout)
            continue

        try:
            level_func(answer)
        except Exception:  # pylint: disable=broad-except
            timer.cancel()
            cl.error('Wrong answer')
            if DEBUG_MODE:
                traceback.print_exc(file=sys.stdout)
            continue

        timer.cancel()
        cl.success('Correct answer')
        level_status[level_number - 1] = True
コード例 #16
0
def twitter_lda(*, output_desc, topics, iteration, alpha_g=None,
                beta_word=0.01, beta_b=0.01, gamma=20,
                show_console_output=True):
    cl.section('Twitter-LDA Runner')
    cl.info('Output description: %s' % output_desc)

    assert re.fullmatch(r'[-_0-9a-zA-Z]+', output_desc)

    if alpha_g is None:
        alpha_g = 50 / topics

    set_parameters(topics, alpha_g, beta_word, beta_b, gamma, iteration)

    with TimeMeasure('Twitter-LDA training'):
        run_twlda(show_console_output=show_console_output)

    move_result(output_desc)
コード例 #17
0
def text_preprocessor_twlda(sourcedesc,
                            *,
                            tweet_min_length=3,
                            user_min_tweets=1,
                            remove_duplicates=False):
    cl.section('Text Preprocessor For Twitter-LDA')

    assert re.fullmatch(r'[-_0-9a-zA-Z+]+', sourcedesc)

    input_filename = data_source_file('%s.csv' % sourcedesc)

    with TimeMeasure('preprocess_text'):
        prepdata, sourcedata = preprocess_csv(input_filename, tweet_min_length,
                                              user_min_tweets,
                                              remove_duplicates)

    with TimeMeasure('save_preprocessed'):
        save_preprocessed(prepdata, sourcedata)
コード例 #18
0
def lda_topic_model(input_filename, keyword, size, *, num_topics,
                    iterations=50, passes=1, chunksize=2000, eval_every=10,
                    verbose=False, gamma_threshold=0.001, filter_no_below=5,
                    filter_no_above=0.5, filter_keep_n=100000,
                    open_browser=True):
    cl.section('LDA Topic Model Training')
    cl.info('Keyword: %s' % keyword)
    cl.info('Data size: %d' % size)
    cl.info('Number of topics: %d' % num_topics)
    cl.info('Iterations: %d' % iterations)
    cl.info('Passes: %d' % passes)
    cl.info('Chunk size: %d' % chunksize)
    cl.info('Eval every: %s' % eval_every)
    cl.info('Verbose: %s' % verbose)
    cl.info('Gamma Threshold: %f' % gamma_threshold)
    cl.info('Filter no below: %d' % filter_no_below)
    cl.info('Filter no above: %f' % filter_no_above)
    cl.info('Filter keep n: %d' % filter_keep_n)

    assert re.fullmatch(r'[-_0-9a-zA-Z+]+', keyword)

    input_filename = data_source_file(input_filename)
    description = '%s-%d-%d-%dx%d-%s' % (keyword, size, num_topics, iterations,
                                         passes, time.strftime('%Y%m%d%H%M%S'))

    if verbose:
        log_filename = log_file('ldalog-%s.log' % description)
        logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                            level=logging.DEBUG, filename=log_filename)
        cl.info('Writing logs into file: %s' % log_filename)

    with TimeMeasure('load_preprocessed_text'):
        preprocessed_texts = file_read_json(input_filename)
        preprocessed_texts = [item[1] for item in preprocessed_texts]

    with TimeMeasure('gen_dict_corpus'):
        cl.progress('Generating dictionary and corpus...')

        dictionary = Dictionary(preprocessed_texts, prune_at=None)
        dictionary.filter_extremes(no_below=filter_no_below,
                                   no_above=filter_no_above,
                                   keep_n=filter_keep_n)
        dictionary.compactify()

        corpus = [dictionary.doc2bow(text) for text in preprocessed_texts]

        corpusfilename = model_file('ldacorpus-%s.json' % description)
        file_write_json(corpusfilename, corpus)
        cl.success('Corpus saved as: %s' % corpusfilename)

    with TimeMeasure('training'):
        cl.progress('Performing training...')

        with NoConsoleOutput():
            ldamodel = LdaMulticore(corpus, workers=N_WORKERS,
                                    id2word=dictionary, num_topics=num_topics,
                                    iterations=iterations, passes=passes,
                                    chunksize=chunksize, eval_every=eval_every,
                                    gamma_threshold=gamma_threshold,
                                    alpha='symmetric', eta='auto')

        cl.success('Training finished.')

    with TimeMeasure('save_model'):
        modelfilename = 'ldamodel-%s' % description
        ldamodel.save(model_file(modelfilename))
        cl.success('Model saved as: %s' % modelfilename)

    with TimeMeasure('measure_coherence'):
        cl.progress('Measuring topic coherence...')
        measure_coherence(ldamodel, preprocessed_texts, corpus, dictionary)

    with TimeMeasure('vis_save'):
        cl.progress('Preparing visualization...')
        vis = pyLDAvis.gensim.prepare(ldamodel, corpus, dictionary)
        htmlfilename = 'ldavis-%s.html' % description
        htmlfilename = report_file(htmlfilename)
        pyLDAvis.save_html(vis, htmlfilename)
        cl.success('Visualized result saved in file: %s' % htmlfilename)

    if open_browser:
        open_html_in_browser(htmlfilename)
コード例 #19
0
def user_info_retriever(usernames, csvfilename):
    cl.section('Twitter User Info Retriever')

    csvfilename = data_source_file(csvfilename)
    result = retrieve_user_info(usernames)
    export_csv(result, csvfilename)