def compare_files(cls, f1, f2, keep_going=True): if f1 == f2: return Comparator(ExecutorStatus.ANSWER_CORRECT, list()) error = list() output = list() result = True lines1 = io.read_file(f1).splitlines() lines2 = io.read_file(f2).splitlines() lenl1 = len(lines1) lenl2 = len(lines2) for i in range(max(lenl1, lenl2)): l1 = lines1[i] if i < lenl1 else '""' l2 = lines2[i] if i < lenl2 else '""' output.append(l2) if l1 != l2: error.extend([ 'line %d error:' % (i + 1), ' expected: %s' % l1, ' found: %s' % l2, '' ]) result = False if not keep_going: break if result: return Comparator(ExecutorStatus.ANSWER_CORRECT, error) else: return Comparator(ExecutorStatus.ANSWER_WRONG, error)
def __init__(self, lang, pool='pool', max_len=64, lang_code=250004): self.context = read_pkl(f'dataset/ckgc/{lang}/context.pkl') self.response = read_pkl(f'dataset/ckgc/{lang}/response.pkl') self.knowledge = read_pkl(f'dataset/ckgc/{lang}/knowledge.pkl') self.pool = [[int(item) for item in line[1:-1].split(',')] for line in read_file(pool)] self.max_len = max_len self.lang_code = lang_code
def create_report(params, metrics, artifact_dict, artifacts_to_render=None, template_path='template.mustache', report_path='report.md'): """Generates a report from a template Args: params (): metrics (): artifact_dict (): artifacts_to_render(dict(str, dict(str, str))): template_path (str): path to template file report_path (str): path to report generated """ if isinstance(params, ModelBuildingSessionParams): params = params.dict() if isinstance(metrics, ModelBuildingSessionMetrics): metrics = metrics.dict() if isinstance(artifact_dict, ModelBuildingSessionOutputArtifacts): artifact_dict = artifact_dict.dict() if artifacts_to_render is not None: for artifact, read_args in artifacts_to_render.items(): artifact_dict[artifact] = read_file(artifact_dict[artifact], **read_args) params = {'_'.join(['params', k]): v for k, v in params.items()} metrics = {'_'.join(['metrics', k]): v for k, v in metrics.items()} artifact_dict = { '_'.join(['artifact_list', k]): v for k, v in artifact_dict.items() } template_inputs = dict() template_inputs.update(params) template_inputs.update(metrics) template_inputs.update(artifact_dict) for k, v in template_inputs.items(): if isinstance(v, pd.DataFrame): template_inputs[k] = v.to_markdown() if isinstance(v, dict): template_inputs[k] = pprint.pformat(v, indent=4) with open(template_path, 'r') as f: report = chevron.render(f, template_inputs) f = open(report_path, 'w') f.write(report) f.close()
input_path = args.i output_path = args.o size = args.size sep = args.sep name = args.name language = args.language if not os.path.exists(output_path): os.makedirs(output_path) t = time.time() logging.info(f'Read WIKI data') passage = ['no_passage_used\nno_passage_used'] doc = '' for file in tqdm(all_file(input_path)): data = read_file(file) for line in data: if '<doc id' in line: doc = doc[:-1] passage.append(doc) doc = '' else: if line != '' and line != '</doc>': doc += line + '\n' logging.info(f'Cut WIKI to sentence') knowledge_collect = [] collect_num = 0 for p in tqdm(passage): k = p.split('\n') topic = k[0]
if 0 not in pool: pool.append(0) context.append(prefix) response.append(turn['text']) pools.append(pool) prefix = prefix + ' </s> ' + turn['text'] write_file(context, f'dataset/ckgc/{lang}/context.txt') write_file(response, f'dataset/ckgc/{lang}/response.txt') write_file(knowledge, f'dataset/ckgc/{lang}/knowledge.txt') write_file(pools, f'dataset/ckgc/{lang}/pool.txt') # input('>>>>') # for reddit-english data = [] data.extend( read_file( 'dataset/reddit_en/reddit_conversations.3turns.train.topical.txt')) data.extend( read_file('dataset/reddit_en/reddit_conversations.3turns.dev.topical.txt')) data.extend( read_file( 'dataset/reddit_en/reddit_conversations.3turns.test.topical.txt')) context = [] response = [] for conv in tqdm(data): conv = conv.split('\t') for i in range(2): context.append(conv[i]) response.append(conv[i + 1]) write_file(context, f'dataset/reddit_en/context.txt')
def main() -> int: trie = Trie() trie.insert(*[line.strip() for line in read_file(sys.argv[1]).split('\n')]) return run(trie)
from dataset_creation.kb_crawl.crawler.crawler import Crawler if __name__ == "__main__": crawlSettings = { 'material': True, 'relation': True, 'property': False, 'comparison': False, } crawler = Crawler() # crawler.comet_conceptnet_interact() # crawler.comet_atomic_interact() if crawlSettings['material']: materials = io.read_file('example_materials_input.csv') print('Generating Conceptnet Materials...') materials_knowledge1 = crawler.crawl_materials(materials) io.write_materials('example_materials_output.csv', materials_knowledge1) print('Generating Comet Conceptnet Materials...') materials_knowledge2 = crawler.crawl_comet_materials(materials) io.write_materials('example_comet_materials_output.csv', materials_knowledge2) if crawlSettings['relation']: relations = io.read_file('example_relations_input.csv') print('Generating Relations...')
def render_plot(): # Set up data if data_path == b'': wavelength = np.arange(3900, 4000) flux = np.random.sample(100) else: spectrum = read_file(data_path.decode()) wavelength = spectrum.wavelength flux = spectrum.flux flux /= flux.max() # Currently only implemented for table source "example" linelist = query(source=line_list.decode(), wavelength_min=min_wave * u.Angstrom, wavelength_max=max_wave * u.Angstrom) ################################################################## # Create object spectrum data source source = ColumnDataSource(data=dict(wavelength=wavelength, flux=flux)) # Create line list label source lines = ColumnDataSource(data=dict(x=linelist.wavelength.value, top=np.zeros_like(linelist.priority), names=linelist.species)) # Create a set of labels for each species labels = LabelSet(x='x', y='top', text='names', level='glyph', x_offset=0, y_offset=0, source=lines, render_mode='canvas', angle=np.pi / 3) # Set up plot plot = figure(plot_height=600, plot_width=700, title="Example spectrum", tools="wheel_zoom,box_zoom,pan,reset,save", x_range=[wavelength.min(), wavelength.max()], y_range=[0, flux.max()], sizing_mode='scale_width') # Add vertical bars for each line in the line list plot.vbar(x='x', top='top', source=lines, color="black", width=0.01, bottom=0, alpha=0.5) # Add the actual spectrum plot.line('wavelength', 'flux', source=source, line_width=1, line_alpha=0.8) # Set up widgets nlines_slider = Slider(title="more/less lines", value=10, start=0, end=100, step=0.01) rv_offset = Slider(title="RV offset", value=0, start=-100, end=100, step=0.01) def on_slider_change(attrname, old, new): n_lines_scale = nlines_slider.value rv_offset_val = rv_offset.value n_lines = int(n_lines_scale / 100 * len(linelist.wavelength)) condition = linelist.priority >= np.sort(linelist.priority)[-n_lines] label_wavelengths = linelist.wavelength.value label_height = condition.astype(float) * flux.max() label_names = linelist.species.copy() # Blank out some labels label_names[~condition] = '' lines.data = dict(x=label_wavelengths + rv_offset_val, top=0.9 * label_height * plot.y_range.end, names=label_names) for w in [nlines_slider, rv_offset]: w.on_change('value', on_slider_change) # Set up layouts and add to document inputs = widgetbox(nlines_slider, rv_offset) #nlines_text plot.add_layout(labels) curdoc().add_root(column(inputs, plot, height=1000)) curdoc().title = "Whose line is it anyway"