Ejemplo n.º 1
0
    def compare_files(cls, f1, f2, keep_going=True):
        if f1 == f2:
            return Comparator(ExecutorStatus.ANSWER_CORRECT, list())

        error = list()
        output = list()
        result = True

        lines1 = io.read_file(f1).splitlines()
        lines2 = io.read_file(f2).splitlines()

        lenl1 = len(lines1)
        lenl2 = len(lines2)

        for i in range(max(lenl1, lenl2)):
            l1 = lines1[i] if i < lenl1 else '""'
            l2 = lines2[i] if i < lenl2 else '""'
            output.append(l2)

            if l1 != l2:
                error.extend([
                    'line %d error:' % (i + 1),
                    '     expected: %s' % l1,
                    '        found: %s' % l2, ''
                ])
                result = False
                if not keep_going:
                    break

        if result:
            return Comparator(ExecutorStatus.ANSWER_CORRECT, error)
        else:
            return Comparator(ExecutorStatus.ANSWER_WRONG, error)
Ejemplo n.º 2
0
    def __init__(self, lang, pool='pool', max_len=64, lang_code=250004):
        self.context = read_pkl(f'dataset/ckgc/{lang}/context.pkl')
        self.response = read_pkl(f'dataset/ckgc/{lang}/response.pkl')
        self.knowledge = read_pkl(f'dataset/ckgc/{lang}/knowledge.pkl')
        self.pool = [[int(item) for item in line[1:-1].split(',')]
                     for line in read_file(pool)]

        self.max_len = max_len
        self.lang_code = lang_code
Ejemplo n.º 3
0
def create_report(params,
                  metrics,
                  artifact_dict,
                  artifacts_to_render=None,
                  template_path='template.mustache',
                  report_path='report.md'):
    """Generates a report from a template

    Args:
        params ():
        metrics ():
        artifact_dict ():
        artifacts_to_render(dict(str, dict(str, str))):
        template_path (str): path to template file
        report_path (str): path to report generated

    """
    if isinstance(params, ModelBuildingSessionParams):
        params = params.dict()
    if isinstance(metrics, ModelBuildingSessionMetrics):
        metrics = metrics.dict()
    if isinstance(artifact_dict, ModelBuildingSessionOutputArtifacts):
        artifact_dict = artifact_dict.dict()

    if artifacts_to_render is not None:
        for artifact, read_args in artifacts_to_render.items():
            artifact_dict[artifact] = read_file(artifact_dict[artifact],
                                                **read_args)

    params = {'_'.join(['params', k]): v for k, v in params.items()}
    metrics = {'_'.join(['metrics', k]): v for k, v in metrics.items()}
    artifact_dict = {
        '_'.join(['artifact_list', k]): v
        for k, v in artifact_dict.items()
    }

    template_inputs = dict()
    template_inputs.update(params)
    template_inputs.update(metrics)
    template_inputs.update(artifact_dict)

    for k, v in template_inputs.items():
        if isinstance(v, pd.DataFrame):
            template_inputs[k] = v.to_markdown()
        if isinstance(v, dict):
            template_inputs[k] = pprint.pformat(v, indent=4)

    with open(template_path, 'r') as f:
        report = chevron.render(f, template_inputs)

    f = open(report_path, 'w')
    f.write(report)
    f.close()
Ejemplo n.º 4
0
    input_path = args.i
    output_path = args.o
    size = args.size
    sep = args.sep
    name = args.name
    language = args.language

    if not os.path.exists(output_path):
        os.makedirs(output_path)

    t = time.time()
    logging.info(f'Read WIKI data')
    passage = ['no_passage_used\nno_passage_used']
    doc = ''
    for file in tqdm(all_file(input_path)):
        data = read_file(file)
        for line in data:
            if '<doc id' in line:
                doc = doc[:-1]
                passage.append(doc)
                doc = ''
            else:
                if line != '' and line != '</doc>':
                    doc += line + '\n'

    logging.info(f'Cut WIKI to sentence')
    knowledge_collect = []
    collect_num = 0
    for p in tqdm(passage):
        k = p.split('\n')
        topic = k[0]
Ejemplo n.º 5
0
                if 0 not in pool:
                    pool.append(0)
                context.append(prefix)
                response.append(turn['text'])
                pools.append(pool)
            prefix = prefix + ' </s> ' + turn['text']
    write_file(context, f'dataset/ckgc/{lang}/context.txt')
    write_file(response, f'dataset/ckgc/{lang}/response.txt')
    write_file(knowledge, f'dataset/ckgc/{lang}/knowledge.txt')
    write_file(pools, f'dataset/ckgc/{lang}/pool.txt')

# input('>>>>')
# for reddit-english
data = []
data.extend(
    read_file(
        'dataset/reddit_en/reddit_conversations.3turns.train.topical.txt'))
data.extend(
    read_file('dataset/reddit_en/reddit_conversations.3turns.dev.topical.txt'))
data.extend(
    read_file(
        'dataset/reddit_en/reddit_conversations.3turns.test.topical.txt'))

context = []
response = []
for conv in tqdm(data):
    conv = conv.split('\t')
    for i in range(2):
        context.append(conv[i])
        response.append(conv[i + 1])

write_file(context, f'dataset/reddit_en/context.txt')
Ejemplo n.º 6
0
def main() -> int:
    trie = Trie()
    trie.insert(*[line.strip() for line in read_file(sys.argv[1]).split('\n')])
    return run(trie)
Ejemplo n.º 7
0
from dataset_creation.kb_crawl.crawler.crawler import Crawler

if __name__ == "__main__":
    crawlSettings = {
        'material': True,
        'relation': True,
        'property': False,
        'comparison': False,
    }
    crawler = Crawler()

    # crawler.comet_conceptnet_interact()
    # crawler.comet_atomic_interact()

    if crawlSettings['material']:
        materials = io.read_file('example_materials_input.csv')

        print('Generating Conceptnet Materials...')
        materials_knowledge1 = crawler.crawl_materials(materials)
        io.write_materials('example_materials_output.csv',
                           materials_knowledge1)

        print('Generating Comet Conceptnet Materials...')
        materials_knowledge2 = crawler.crawl_comet_materials(materials)
        io.write_materials('example_comet_materials_output.csv',
                           materials_knowledge2)

    if crawlSettings['relation']:
        relations = io.read_file('example_relations_input.csv')

        print('Generating Relations...')
Ejemplo n.º 8
0
def render_plot():
    # Set up data
    if data_path == b'':
        wavelength = np.arange(3900, 4000)
        flux = np.random.sample(100)
    else:
        spectrum = read_file(data_path.decode())
        wavelength = spectrum.wavelength
        flux = spectrum.flux
        flux /= flux.max()

    # Currently only implemented for table source "example"
    linelist = query(source=line_list.decode(),
                     wavelength_min=min_wave * u.Angstrom,
                     wavelength_max=max_wave * u.Angstrom)
    ##################################################################

    # Create object spectrum data source
    source = ColumnDataSource(data=dict(wavelength=wavelength, flux=flux))

    # Create line list label source
    lines = ColumnDataSource(data=dict(x=linelist.wavelength.value,
                                       top=np.zeros_like(linelist.priority),
                                       names=linelist.species))

    # Create a set of labels for each species
    labels = LabelSet(x='x',
                      y='top',
                      text='names',
                      level='glyph',
                      x_offset=0,
                      y_offset=0,
                      source=lines,
                      render_mode='canvas',
                      angle=np.pi / 3)

    # Set up plot
    plot = figure(plot_height=600,
                  plot_width=700,
                  title="Example spectrum",
                  tools="wheel_zoom,box_zoom,pan,reset,save",
                  x_range=[wavelength.min(),
                           wavelength.max()],
                  y_range=[0, flux.max()],
                  sizing_mode='scale_width')

    # Add vertical bars for each line in the line list
    plot.vbar(x='x',
              top='top',
              source=lines,
              color="black",
              width=0.01,
              bottom=0,
              alpha=0.5)

    # Add the actual spectrum
    plot.line('wavelength',
              'flux',
              source=source,
              line_width=1,
              line_alpha=0.8)

    # Set up widgets
    nlines_slider = Slider(title="more/less lines",
                           value=10,
                           start=0,
                           end=100,
                           step=0.01)

    rv_offset = Slider(title="RV offset",
                       value=0,
                       start=-100,
                       end=100,
                       step=0.01)

    def on_slider_change(attrname, old, new):
        n_lines_scale = nlines_slider.value
        rv_offset_val = rv_offset.value
        n_lines = int(n_lines_scale / 100 * len(linelist.wavelength))
        condition = linelist.priority >= np.sort(linelist.priority)[-n_lines]
        label_wavelengths = linelist.wavelength.value
        label_height = condition.astype(float) * flux.max()

        label_names = linelist.species.copy()
        # Blank out some labels
        label_names[~condition] = ''

        lines.data = dict(x=label_wavelengths + rv_offset_val,
                          top=0.9 * label_height * plot.y_range.end,
                          names=label_names)

    for w in [nlines_slider, rv_offset]:
        w.on_change('value', on_slider_change)

    # Set up layouts and add to document
    inputs = widgetbox(nlines_slider, rv_offset)  #nlines_text
    plot.add_layout(labels)

    curdoc().add_root(column(inputs, plot, height=1000))
    curdoc().title = "Whose line is it anyway"