def save_preprocessed(prepdata, sourcedata):
    output_dir = twlda_prep_file('test')
    shutil.rmtree(output_dir, ignore_errors=True)
    os.mkdir(output_dir)

    for user, tweets in prepdata.items():
        output_filename = sanitize_filename(user) + '.txt'
        output_filename = os.path.join(output_dir, output_filename)
        file_write_lines(output_filename, tweets)

    manifest_filename = twlda_data_file('filelist_test.txt')
    file_write_lines(manifest_filename, os.listdir(output_dir))

    cl.success('Preprocessed result saved in folder: %s' % output_dir)

    output_dir = twlda_source_file('test')
    shutil.rmtree(output_dir, ignore_errors=True)
    os.mkdir(output_dir)

    for user, tweets in sourcedata.items():
        output_filename = sanitize_filename(user) + '.txt'
        output_filename = os.path.join(output_dir, output_filename)
        file_write_lines(output_filename, tweets)

    cl.success('Grouped original tweets saved in folder: %s' % output_dir)
Ejemplo n.º 2
0
def demo2():
    cl.section('Demo 2')

    username = ''
    while not username:
        username = cl.input('Username: '******''
    while not password:
        password = cl.password('Password: '******'Successfully logged in.')

    with cl.progress('Checking for update...', mode=cl.PROGRESS_SPIN):
        time.sleep(3)

    choice = ''
    while choice.lower() not in {'y', 'n'}:
        choice = cl.question(
            'A new version is present, would you like to update? (Y/N)').strip(
            )

    if choice.lower() == 'y':
        with cl.progress('Downloading ', mode=cl.PROGRESS_DETERMINATE) as p:
            time.sleep(1)
            p.update(0.2, ' 20% (1MB/5MB) ETA 4s')
            time.sleep(2)
            p.update(0.4, ' 40% (2MB/5MB) ETA 3s')

        cl.error('Failed to download package. SSL handshake error.')
    else:
        cl.warning('Update delayed!')
Ejemplo n.º 3
0
def export_csv(data, outfilename, encoding='utf-8'):
    cl.progress('Exporting data to csv file: %s' % outfilename)

    it = iter(data)
    num_records = 0

    try:
        first_item = next(it)
    except StopIteration:
        cl.warning('Empty data. Export aborted.')
        return
    else:
        num_records += 1

    with open(outfilename, 'w', newline='', encoding=encoding) as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=first_item.keys())
        writer.writeheader()
        writer.writerow(first_item)

        try:
            for item in it:
                num_records += 1
                writer.writerow(item)
        except KeyboardInterrupt:
            cl.warning('User hit Ctrl-C, flushing data...')

    cl.success('%d record(s) saved to csv file.' % num_records)
def export_html(keyword, desc, data, portable, open_browser):
    data = json.dumps({
        'keyword': keyword,
        'description': desc,
        'topics': data,
        'currentWord': {'text': []},
        'currentUser': {'info': {}, 'text': []}
    }, separators=(',', ':'))

    if portable:
        template = file_read_contents(visual_file('template.html'))
        html = re.sub(r'<script src="data.js">(.*)', '', template)
        html = re_sub_literal(r'var data =(.*)', 'var data = ' + data, html)

        reportfilename = 'ldavisual-%s-%s.html' \
            % (desc, time.strftime('%Y%m%d%H%M%S'))
        reportfile = report_file(reportfilename)
        file_write_contents(reportfile, html)
        cl.success('Visualization saved as: %s' % reportfilename)

        if open_browser:
            open_html_in_browser(reportfile)

        return reportfilename
    else:
        file_write_contents(visual_file('data.js'), 'var data = ' + data)
        cl.success('Visualization data saved as: data.js')
        return None
Ejemplo n.º 5
0
def overview():
    cl.section('Overview of Labels')
    cl.success('Good job! All test cases passed!')
    cl.warning('Warning! Security update delayed!')
    cl.error('Error! Failed to write file!')
    cl.info('Server listening on port 8888.')
    cl.progress('Downloading package, please wait...')
    cl.plain('Nothing interesting.')
    cl.question('A new version is present, would you like to update? (Y/N)')
Ejemplo n.º 6
0
def run_linter(linter: Linter) -> bool:
    linter_name = linter['name']
    cl.progress('Running linter {}'.format(linter_name))
    result = subprocess.call(linter['command'])  # nosec
    if result == 0:
        cl.success('Linter {} success'.format(linter_name))
        return True
    cl.error('Linter {} failed'.format(linter_name))
    return False
Ejemplo n.º 7
0
def win():
    cl.success('Congratulations! You solved all the challenges!')
    cl.info(
        "Now here is a gift for you. You can choose a callback function to call "
        "(e.g. try 'rainbow_fart')! Hope you can find the final flag through this!"
    )
    callback = get_callback()
    cl.progress(f'Executing callback {callback!r} for you...')
    exec(f'{callback}()')  # pylint: disable=exec-used # nosec
    bye()
    def wrapper(*args, **kwargs):
        p = cl.progress(f"Running test case '{func.__name__}'...",
                        cl.PROGRESS_SPIN,
                        erase=True)

        try:
            func(*args, **kwargs)
        except:
            p.stop()
            cl.error(f"Test case '{func.__name__}' failed.")
            raise
        else:
            p.stop()
            cl.success(f"Test case '{func.__name__}' passed.")
Ejemplo n.º 9
0
def check_milestones(level_status):
    levels_solved = sum(level_status)
    if levels_solved == NUM_LEVELS:
        cl.success(FLAG5)
        cl.newline()
        win()
    elif levels_solved == round(0.8 * NUM_LEVELS):  # pylint: disable=round-builtin
        cl.success(FLAG4)
    elif levels_solved == round(0.6 * NUM_LEVELS):  # pylint: disable=round-builtin
        cl.success(FLAG3)
    elif levels_solved == round(0.4 * NUM_LEVELS):  # pylint: disable=round-builtin
        cl.success(FLAG2)
    elif levels_solved == round(0.2 * NUM_LEVELS):  # pylint: disable=round-builtin
        cl.success(FLAG1)
Ejemplo n.º 10
0
def compress_report_files(tag, report_files):
    os.chdir(report_file(''))
    zipfilename = 'ldareport-%s.zip' % tag

    with zipfile.ZipFile(zipfilename, 'w', zipfile.ZIP_DEFLATED) as zf:
        for f in report_files:
            zf.write(f)

    for f in report_files:
        with contextlib.suppress(Exception):
            os.remove(f)

    os.chdir(os.path.dirname(os.path.abspath(__file__)))
    cl.success('Report files archived into: %s' % zipfilename)
    return zipfilename
Ejemplo n.º 11
0
def main():
    cl.section('Welcome to Python Challenges')
    cl.info(f'Python version: {PYTHON_VERSION}')
    level_status = [False] * NUM_LEVELS
    while True:
        show_level_stats(level_status)

        cl.info(f'Enter a level number (1-{NUM_LEVELS}) to solve a level, '
                'or enter 0 to view source code')
        level_number = get_level_number()

        if level_number == 0:
            print(SOURCE, end='')
            continue

        if level_status[level_number - 1]:
            cl.success('You already solved this level')
            continue

        level_func = globals()[f'level_{level_number}']
        answer = get_input(f'Your answer for level {level_number}: ')

        timer = threading.Timer(CHALLENGE_TIMEOUT, die, args=('Timeout!', ))
        timer.start()

        try:
            global_check(answer)
            answer = ast.literal_eval(answer.strip())
        except Exception:  # pylint: disable=broad-except
            timer.cancel()
            cl.error('Wrong answer')
            if DEBUG_MODE:
                traceback.print_exc(file=sys.stdout)
            continue

        try:
            level_func(answer)
        except Exception:  # pylint: disable=broad-except
            timer.cancel()
            cl.error('Wrong answer')
            if DEBUG_MODE:
                traceback.print_exc(file=sys.stdout)
            continue

        timer.cancel()
        cl.success('Correct answer')
        level_status[level_number - 1] = True
def export_markdown(modeldesc, sourcedesc, topics):
    analysisfilename = report_file('ldaanl-%s.md' % modeldesc)

    with open(analysisfilename, 'w', encoding='utf-8') as analysisfile:
        analysisfile.write('# Topic Model Analysis\n\n')
        analysisfile.write('- Model description: %s\n' % modeldesc)
        analysisfile.write('- Source description: %s\n' % sourcedesc)

        for index, topic in enumerate(topics):
            analysisfile.write('\n## %s Topic\n\n' % rank(index + 1))
            analysisfile.write('ID: %d\n\n' % topic['topic_id'])
            analysisfile.write('Words: %s\n\n' % topic['words'])

            for text in topic['documents']:
                text = text.strip()
                analysisfile.write('- %s\n' % text)

    cl.success('Analysis file saved as: %s' % analysisfilename)
Ejemplo n.º 13
0
def demo1():
    cl.section('Demo 1')

    cl.info('Test program started.')

    with cl.progress('Running test case 1...', cl.PROGRESS_SPIN, erase=True):
        time.sleep(3)
    cl.success('Test case 1: Passed')

    with cl.progress('Running test case 2...', cl.PROGRESS_SPIN, erase=True):
        time.sleep(3)
    cl.success('Test case 2: Passed')

    with cl.progress('Running test case 3...', cl.PROGRESS_SPIN, erase=True):
        time.sleep(3)
    cl.success('Test case 3: Passed')

    with cl.progress('Running test case 4...', cl.PROGRESS_SPIN, erase=True):
        time.sleep(3)
    cl.error('Test case 4: Failed')

    cl.info('Input: 1111')
    cl.info('Expected output: 2222')
    cl.info('Got: 3333')

    cl.section('Test Result')
    cl.info('3 out of 4 test cases passed.')
    cl.info('Pass rate: 75%')
Ejemplo n.º 14
0
import sys

import colorlabels as cl

from service.auth import register
from util import validate_password, validate_username

if __name__ == '__main__':
    if len(sys.argv) < 3:
        cl.warning('Usage: %s username password' % sys.argv[0])
        sys.exit(-1)

    username = sys.argv[1]
    password = sys.argv[2]

    r = validate_username(username)
    if not r:
        cl.error(str(r))
        sys.exit(-1)

    r = validate_password(password)
    if not r:
        cl.error(str(r))
        sys.exit(-1)

    if register(username, password):
        cl.success('Successfully registered user %r.' % username)
    else:
        cl.error('User %r already exists!' % username)
Ejemplo n.º 15
0
    {
        'name': 'Bandit',
        'command': ['bandit', '-c', 'bandit.yml', '-r', '.'],
    },
    {
        'name': 'Vermin',
        'command': ['vermin', '.'],
    },
]  # type: List[Linter]


def run_linter(linter: Linter) -> bool:
    linter_name = linter['name']
    cl.progress('Running linter {}'.format(linter_name))
    result = subprocess.call(linter['command'])  # nosec
    if result == 0:
        cl.success('Linter {} success'.format(linter_name))
        return True
    cl.error('Linter {} failed'.format(linter_name))
    return False


# Avoid short-circuiting to show all linter output at the same time.
all_results = [run_linter(linter) for linter in linters]  # type: List[bool]

if all(all_results):
    cl.success('All linters success')
else:
    cl.error('Some linters failed, check output for more information')
    sys.exit(1)
def save_preprocessed(data, csvfilename):
    output_filename = name_replace_ext(csvfilename, '.prep.json')
    file_write_json(output_filename, data)
    cl.success('Preprocessed result saved as: %s' % output_filename)
Ejemplo n.º 17
0
def lda_topic_model(input_filename, keyword, size, *, num_topics,
                    iterations=50, passes=1, chunksize=2000, eval_every=10,
                    verbose=False, gamma_threshold=0.001, filter_no_below=5,
                    filter_no_above=0.5, filter_keep_n=100000,
                    open_browser=True):
    cl.section('LDA Topic Model Training')
    cl.info('Keyword: %s' % keyword)
    cl.info('Data size: %d' % size)
    cl.info('Number of topics: %d' % num_topics)
    cl.info('Iterations: %d' % iterations)
    cl.info('Passes: %d' % passes)
    cl.info('Chunk size: %d' % chunksize)
    cl.info('Eval every: %s' % eval_every)
    cl.info('Verbose: %s' % verbose)
    cl.info('Gamma Threshold: %f' % gamma_threshold)
    cl.info('Filter no below: %d' % filter_no_below)
    cl.info('Filter no above: %f' % filter_no_above)
    cl.info('Filter keep n: %d' % filter_keep_n)

    assert re.fullmatch(r'[-_0-9a-zA-Z+]+', keyword)

    input_filename = data_source_file(input_filename)
    description = '%s-%d-%d-%dx%d-%s' % (keyword, size, num_topics, iterations,
                                         passes, time.strftime('%Y%m%d%H%M%S'))

    if verbose:
        log_filename = log_file('ldalog-%s.log' % description)
        logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                            level=logging.DEBUG, filename=log_filename)
        cl.info('Writing logs into file: %s' % log_filename)

    with TimeMeasure('load_preprocessed_text'):
        preprocessed_texts = file_read_json(input_filename)
        preprocessed_texts = [item[1] for item in preprocessed_texts]

    with TimeMeasure('gen_dict_corpus'):
        cl.progress('Generating dictionary and corpus...')

        dictionary = Dictionary(preprocessed_texts, prune_at=None)
        dictionary.filter_extremes(no_below=filter_no_below,
                                   no_above=filter_no_above,
                                   keep_n=filter_keep_n)
        dictionary.compactify()

        corpus = [dictionary.doc2bow(text) for text in preprocessed_texts]

        corpusfilename = model_file('ldacorpus-%s.json' % description)
        file_write_json(corpusfilename, corpus)
        cl.success('Corpus saved as: %s' % corpusfilename)

    with TimeMeasure('training'):
        cl.progress('Performing training...')

        with NoConsoleOutput():
            ldamodel = LdaMulticore(corpus, workers=N_WORKERS,
                                    id2word=dictionary, num_topics=num_topics,
                                    iterations=iterations, passes=passes,
                                    chunksize=chunksize, eval_every=eval_every,
                                    gamma_threshold=gamma_threshold,
                                    alpha='symmetric', eta='auto')

        cl.success('Training finished.')

    with TimeMeasure('save_model'):
        modelfilename = 'ldamodel-%s' % description
        ldamodel.save(model_file(modelfilename))
        cl.success('Model saved as: %s' % modelfilename)

    with TimeMeasure('measure_coherence'):
        cl.progress('Measuring topic coherence...')
        measure_coherence(ldamodel, preprocessed_texts, corpus, dictionary)

    with TimeMeasure('vis_save'):
        cl.progress('Preparing visualization...')
        vis = pyLDAvis.gensim.prepare(ldamodel, corpus, dictionary)
        htmlfilename = 'ldavis-%s.html' % description
        htmlfilename = report_file(htmlfilename)
        pyLDAvis.save_html(vis, htmlfilename)
        cl.success('Visualized result saved in file: %s' % htmlfilename)

    if open_browser:
        open_html_in_browser(htmlfilename)