コード例 #1
0
def crawl_pages(subcats):
    dirpath = "data/site/%s/%s/" % (config.wiki_lang, config.start_cat)
    pages = []

    counter = 0
    for subcat in subcats:
        counter += 1
        pb.update(counter, len(subcats))

        subcat_dirpath = dirpath + subcat + "/"
        misc.mkdir_p(subcat_dirpath)

        filepath = subcat_dirpath + "pages.txt"
        if os.path.exists(filepath):
            subcat_pages = misc.read_file(filepath)
        else:
            subcat_pages = get_subcat_pages(subcat)
            misc.write_file(filepath, subcat_pages)

        pages.extend(subcat_pages)

    pages = [
        page for page in pages if not config.page_bl(page) and lang.can(page)
    ]
    pages = OrderedDict.fromkeys(pages).keys()  # unique
    return pages
コード例 #2
0
ファイル: parameters_misc.py プロジェクト: yinxx/Octavo
def read_parameter_file(path, name):
    try:
        raw = misc.read_file(path, name + filename_extension)
    except IOError:
        raw = "{}"
    cooked  = ast.literal_eval(raw)
    return cooked
コード例 #3
0
def translator(target='', detail=0):
	from vivaldi_translator import test
	
	if target != '':
		test_input = read_file(target)
		test(test_input, test_set=False, detail=detail)
	else:
		path = 'test_set/'
		tests = read_file(path+'test_list')
		test_list = get_test_list(tests)
		
		for test_name in test_list:
			test_data = get_test_data(path+test_name, ['test_input','test_output','target'])
			
			print "TEST:", test_name
			flag = test(test_data, detail=detail)
			if flag == False:
				return False
	return True
コード例 #4
0
def test_vi2cu_translator(target='', detail=0):
	from vi2cu_translator.main import test
	if target != '':
		test_input = read_file(target)
		flag = test(test_input, test_set=False, detail=detail)
	else:
		path = 'vi2cu_translator/test_set/'

		tests = read_file(path+'test_list')
		test_list = get_test_list(tests)
		
		for test_name in test_list:
			test_data = get_test_data(path+test_name, ['test_input','test_output','dtype_dict','return_dtype'])
		
			print "TEST:", test_name
			flag = test(test_data)
			if flag == False:
				return False
					
	return True
コード例 #5
0
def preprocessing(target='', detail=0):
	from preprocessing.main import test
	
	if target != '':
		test_input = read_file(target)
		flag = test(test_input, test_set=False, detail=detail)
	else:
		path = 'preprocessing/test_set/'
		
		tests = read_file(path+'test_list')
		test_list = get_test_list(tests)
		
		for test_name in test_list:
			test_data = get_test_data(path+test_name, ['test_input','test_output'])
			
			print "TEST:", test_name
			flag = test(test_data)
			if flag == False:
				return False
	return True
コード例 #6
0
def translator(target='', detail=0):
    from vivaldi_translator import test

    if target != '':
        test_input = read_file(target)
        test(test_input, test_set=False, detail=detail)
    else:
        path = 'test_set/'
        tests = read_file(path + 'test_list')
        test_list = get_test_list(tests)

        for test_name in test_list:
            test_data = get_test_data(path + test_name,
                                      ['test_input', 'test_output', 'target'])

            print "TEST:", test_name
            flag = test(test_data, detail=detail)
            if flag == False:
                return False
    return True
コード例 #7
0
ファイル: crawler.py プロジェクト: Leeyp/WiktionaryCrawler
def crawl_subcats():
	dirpath = "data/site/%s/" % config.start_cat
	filepath = dirpath + "%s/subcats.txt"

	if os.path.exists(filepath):
		subcats = misc.read_file(filepath)
	else:
		subcats = get_subcats(config.start_cat)
		subcats = [subcat for subcat in subcats if lang.can_subcat(subcat)]
		
		misc.write_file(dirpath + "subcats.txt", subcats)
	return subcats
コード例 #8
0
def crawl_subcats():
    dirpath = "data/site/%s/" % config.start_cat
    filepath = dirpath + "%s/subcats.txt"

    if os.path.exists(filepath):
        subcats = misc.read_file(filepath)
    else:
        subcats = get_subcats(config.start_cat)
        subcats = [subcat for subcat in subcats if lang.can_subcat(subcat)]

        misc.write_file(dirpath + "subcats.txt", subcats)
    return subcats
コード例 #9
0
def get_test_vi2cu_translator(file_name):
    test = {}
    a = read_file(file_name)
    test = ast.literal_eval(a)

    if False:
        # print test for testing, read file
        for test_name in test:
            print test[test_name]
        exit()

    return test
コード例 #10
0
def parse_main(target='', detail=0):
    from parse_main.main import test

    if target != '':
        test_input = read_file(target)
        flag = test(test_input, test_set=False, detail=detail)
    else:
        path = 'parse_main/test_set/'
        tests = read_file(path + 'test_list')
        test_list = get_test_list(tests)

        for test_name in test_list:
            test_data = get_test_data(path + test_name,
                                      ['test_input', 'test_output'])

            print "TEST:", test_name
            flag = test(test_data, detail=detail)
            if flag == False:
                return False

    return True
コード例 #11
0
def get_test_vi2cu_translator(file_name):
	test = {}
	a = read_file(file_name)
	test = ast.literal_eval(a)

	if False:
		# print test for testing, read file
		for test_name in test:
			print test[test_name]
		exit()
	
	return test
コード例 #12
0
def test_vi2cu_translator(target='', detail=0):
    from vi2cu_translator.main import test
    if target != '':
        test_input = read_file(target)
        flag = test(test_input, test_set=False, detail=detail)
    else:
        path = 'vi2cu_translator/test_set/'

        tests = read_file(path + 'test_list')
        test_list = get_test_list(tests)

        for test_name in test_list:
            test_data = get_test_data(
                path + test_name,
                ['test_input', 'test_output', 'dtype_dict', 'return_dtype'])

            print "TEST:", test_name
            flag = test(test_data)
            if flag == False:
                return False

    return True
コード例 #13
0
ファイル: run_test.py プロジェクト: hvcl-old/Vivaldi
def get_test(file_name):
    test = {}
    a = read_file(file_name)

    fin = a.find('test_input:')
    fout = a.find('test_output:')

    test['input'] = a[fin + len('test_input:'):fout]
    test['output'] = a[fout + len('test_output:'):]

    # remove first new line
    test['input'] = remove_first_endline(test['input'])
    test['output'] = remove_first_endline(test['output'])

    return test
コード例 #14
0
ファイル: run_test.py プロジェクト: hvcl/Vivaldi
def get_test(file_name):
	test = {}
	a = read_file(file_name)

	fin = a.find('test_input:')
	fout = a.find('test_output:')

	test['input'] = a[fin+len('test_input:'):fout]
	test['output'] = a[fout+len('test_output:'):]

	# remove first new line
	test['input'] = remove_first_endline(test['input'])
	test['output'] = remove_first_endline(test['output'])
	
	return test
コード例 #15
0
ファイル: run_test.py プロジェクト: hvcl-old/Vivaldi
def divide_line():
    from functions.divide_line.divide_line import test
    path = 'functions/divide_line/test_set/'

    tests = read_file(path + 'test_list')
    test_list = tests.split('\n')

    for test_name in test_list:
        if test_name == '': continue
        test_data = get_test(path + test_name)

        print "TEST:", test_name
        test(test_data['input'], test_data['output'])

    return False
コード例 #16
0
ファイル: run_test.py プロジェクト: hvcl/Vivaldi
def divide_line():
	from functions.divide_line.divide_line import test
	path = 'functions/divide_line/test_set/'
	
	tests = read_file(path+'test_list')
	test_list = tests.split('\n')

	for test_name in test_list:
		if test_name == '':continue
		test_data = get_test(path+test_name)

		print "TEST:", test_name
		test(test_data['input'], test_data['output'])
		
	return False
コード例 #17
0
ファイル: run_test.py プロジェクト: hvcl-old/Vivaldi
def code_to_line_list():
    from functions.code_to_line_list.code_to_line_list import test
    path = 'functions/code_to_line_list/test_set/'

    tests = read_file(path + 'test_list')
    test_list = tests.split('\n')

    for test_name in test_list:
        if test_name == '': continue
        test_data = get_test(path + test_name)

        test_output = test_data['output'].split('\n')

        print "TEST:", test_name
        test(test_data['input'], test_output)
    return False
コード例 #18
0
ファイル: run_test.py プロジェクト: hvcl/Vivaldi
def code_to_line_list():
	from functions.code_to_line_list.code_to_line_list import test
	path = 'functions/code_to_line_list/test_set/'

	tests = read_file(path+'test_list')
	test_list = tests.split('\n')
	
	for test_name in test_list:
		if test_name == '':continue
		test_data = get_test(path+test_name)

		test_output = test_data['output'].split('\n')
		
		print "TEST:", test_name
		test(test_data['input'], test_output)
	return False
コード例 #19
0
def divide_line(target='',detail=0):
	from general.divide_line.divide_line import test
	path = 'general/divide_line/test_set/'
	
	tests = read_file(path+'test_list')
	test_list = get_test_list(tests)

	if target != '': test_list = [target]
	for test_name in test_list:
		test_data = get_test_data(path+test_name,['test_input','test_output'])
		
		print "TEST:", test_name
		flag = test(test_data, detail=detail)
		if flag == False:
			return False
	return True
コード例 #20
0
ファイル: run_test.py プロジェクト: hvcl/Vivaldi
def split_into_block_and_code(target=''):
	from functions.split_into_block_and_code.split_into_block_and_code import test
	path = 'functions/split_into_block_and_code/test_set/'

	tests = read_file(path+'test_list')
	test_list = tests.split('\n')

	if target != '': test_list = [target]
	for test_name in test_list:
		if len(test_name) > 0 and test_name[0] == '#':continue
		if test_name == '':continue
		test_data = get_test_data(path+test_name, ['test_input','test_output'])

		print "TEST:", test_name
		test(test_data)
	return True
コード例 #21
0
ファイル: run_test.py プロジェクト: hvcl-old/Vivaldi
def get_test_data(file_name, data_list=[]):
    # read file and make test data
    test_data = {}
    a = read_file(file_name)

    # make index list
    idx_list = []
    for elem in data_list:
        idx = a.find(elem)
        idx_list.append(idx)

    idx_list.sort()

    # make dictionary
    m = len(idx_list)
    i = 0
    while i < m:

        if i + 1 < m:
            idx = idx_list[i]
            colon = a.find(':', idx + 1)
            name = a[idx:colon]
            st = colon + 1
            next = idx_list[i + 1]
            content = a[st:next]
            test_data[name] = content
        else:
            idx = idx_list[i]
            colon = a.find(':', idx + 1)
            name = a[idx:colon]
            st = colon + 1

            content = a[st:]
            test_data[name] = content

        i += 1

    # tab to space
    for name in test_data:
        test_data[name] = test_data[name].replace('\t', '    ')

    # remove space after line
    for name in test_data:
        test_data[name] = remove_space_after_line(test_data[name])

    return test_data
コード例 #22
0
ファイル: run_test.py プロジェクト: hvcl-old/Vivaldi
def split_into_block_and_code(target=''):
    from functions.split_into_block_and_code.split_into_block_and_code import test
    path = 'functions/split_into_block_and_code/test_set/'

    tests = read_file(path + 'test_list')
    test_list = tests.split('\n')

    if target != '': test_list = [target]
    for test_name in test_list:
        if len(test_name) > 0 and test_name[0] == '#': continue
        if test_name == '': continue
        test_data = get_test_data(path + test_name,
                                  ['test_input', 'test_output'])

        print "TEST:", test_name
        test(test_data)
    return True
コード例 #23
0
def divide_line(target='', detail=0):
    from general.divide_line.divide_line import test
    path = 'general/divide_line/test_set/'

    tests = read_file(path + 'test_list')
    test_list = get_test_list(tests)

    if target != '': test_list = [target]
    for test_name in test_list:
        test_data = get_test_data(path + test_name,
                                  ['test_input', 'test_output'])

        print "TEST:", test_name
        flag = test(test_data, detail=detail)
        if flag == False:
            return False
    return True
コード例 #24
0
ファイル: run_test.py プロジェクト: hvcl/Vivaldi
def get_test_data(file_name, data_list=[]):
	# read file and make test data
	test_data = {}
	a = read_file(file_name)

	# make index list
	idx_list = []
	for elem in data_list:
		idx = a.find(elem)
		idx_list.append(idx)
		
	idx_list.sort()
	
	# make dictionary
	m = len(idx_list)
	i = 0
	while i < m:
	
		if i+1 < m:
			idx = idx_list[i]
			colon = a.find(':', idx+1)
			name = a[idx:colon]
			st = colon+1
			next = idx_list[i+1]
			content = a[st:next]
			test_data[name] = content
		else:
			idx = idx_list[i]
			colon = a.find(':', idx+1)
			name = a[idx:colon]
			st = colon+1
			
			content = a[st:]
			test_data[name] = content
		
		i += 1
		
	# tab to space
	for name in test_data:
		test_data[name] = test_data[name].replace('\t','    ')
		
	# remove space after line
	for name in test_data:
		test_data[name] = remove_space_after_line(test_data[name])
	
	return test_data
コード例 #25
0
def create_node_interstage(definitions: dict, node: BlogPost) -> None:
    # The interstage is the user markdown with the
    # node references mixed in. This interstage is
    # what will then be turned into html.

    markdown = read_file(node.markdown_path)
    for word, (regex, target_node_id) in definitions.items():
        if word == node.name:
            continue

        # Turn word into syntax [word]({{ post: X }}).
        # We have to do some lambda magic to avoid silly overlapping issues.
        markdown = regex.sub(
            lambda match: rf"[{match.group(1)}]({{{{ post: {target_node_id} }}}})"
            if match.group(1) else match.group(0), markdown)

    write_file(node.interstage_path, markdown)
コード例 #26
0
def spellcheck() -> None:
    en_checker = SpellChecker(language="en")
    de_checker = SpellChecker(language="de")
    for post in db.query(BlogPost):
        markdown = read_file(post.markdown_path)
        for i, line in enumerate(markdown.splitlines()):
            words = [re.sub(r"[^a-zA-Z ]", "", word) for word in line.replace("-", " ").split()]
            words = [word for word in words if word]

            unknown_en_words = en_checker.unknown(words)
            unknown_de_words = de_checker.unknown(words)

            unknown_words = [word for word in unknown_en_words if word in unknown_de_words]

            for unknown_word in unknown_words:
                print(f"In \"{post.name}\" (line {i + 1}): Unknown word \"{unknown_word}\".")

    done()
コード例 #27
0
ファイル: crawler.py プロジェクト: Leeyp/WiktionaryCrawler
def crawl_pages(subcats):
	dirpath = "data/site/%s/" % config.start_cat
	pages = []

	counter = 0
	for subcat in subcats:
		counter += 1
		pb.update(counter, len(subcats))

		subcat_dirpath = dirpath + subcat + "/"
		misc.mkdir_p(subcat_dirpath)

		filepath = subcat_dirpath + "pages.txt"
		if os.path.exists(filepath):
			subcat_pages = misc.read_file(filepath)
		else:
			subcat_pages = get_subcat_pages(subcat)
			misc.write_file(filepath, subcat_pages)

		pages.extend(subcat_pages)

	pages = [page for page in pages if lang.can_page(page)]
	pages = OrderedDict.fromkeys(pages).keys() # unique
	return pages
コード例 #28
0
        )
        print(f"回测图像输出到{gdir}")
        # return res_pac, gdir
        return wrapper_df
    else:
        # return res_pac
        return wrapper_df


if __name__ == "__main__":
    constant.reset_params()
    constant.check_dir()
    start_time = constant.BEGIN_DATE
    end_time = constant.END_DATE
    # 读入对象
    codes = read_file(constant.CODE_FILE)
    # 创建全局回测对象
    # global_backtest_obj = GlobalBacktest(start_time=start_time,
    #                                      end_time=end_time)
    # global_backtest_obj.run_backtest(global_index="000300.SH",
    #                                  func=runbacktest,
    #                                  key_params=dict(begin=start_time, end=end_time,
    #                                                  dir=constant.GRAPH))
    for code in codes:
        runbacktest(
            begin=start_time,
            end=end_time,
            dir=constant.GRAPH,
            codename=code,
        )
コード例 #29
0
def convert_markdown_for_post(post: BlogPost) -> None:
    markdown_src = read_file(post.interstage_path)
    markdown_src = pre_process_markdown(markdown_src, post)
    html_src = markdown.markdown(
        markdown_src, extensions=["sane_lists", "md_in_html", "extra"])
    write_file(post.html_path, html_src)