コード例 #1
0
ファイル: loader.py プロジェクト: fiddenmar/pybico
	def __load_txt(self):
		rn1 = r"(?P<authors>((\pL\. ?(\pL\. )?\pL+,? )|(\pL+ \pL\. ?(\pL\.)?,? )" #regular for authors
		rn2 = r"|(\p{Lu}\p{Ll}+ \p{Lu}\p{Ll}+,? )"
		rn3 = r")+)"
		ra_ru = r"(?P<article>\p{Lu}\p{Ll}+ \p{Ll}+.*?) *\/\/ *" #regular for article
		ra_eng = r"(?P<article>\p{Lu}.*?) *\/\/ *" #regular for article
		rj = r'(?P<source>[ \pL"“”]+)' #regular for source
		rm = r"(?P<misc>.+)" #regular for misc
		reg_ru = re.compile(rn1+rn2+rn3+ra_ru+rj+rm, re.UNICODE)
		reg_eng = re.compile(rn1+rn3+ra_eng+rj+rm, re.UNICODE)
		data = []
		f = open(self.filename, 'r')
		content = f.read()
		items = content.split('\n')
		for item in items:
			res = None
			if isEnglish(item[:15]):
				res = reg_eng.match(item.strip())
			else:
				res = reg_ru.match(item.strip())
			if res != None:
				publication = Publication()
				publication.authors = Author.parseAuthors(res.group("authors"))


				data.append({"authors": split_authors(res.group("authors")), "article": res.group("article"), "source": res.group("source"), "misc": res.group("misc")})
			else:
				print("Wrong line: " + item)
		return data