def stringToCitation(self, ref): ref = ref[ref.find('.') + 1:] ref = re.split('[,.()]', ref) author_arr = [] title = '' year = 0 for idx, element in enumerate(ref): if (element.isdigit()): year = element raw_title = ref[idx + 1].lower() raw_title = raw_title.replace('‚', '').replace('"', '') #title = re.sub(r'\W+', ' ', raw_title) #title = "+".join(title.split()) title = WordInference.inferSpaces(raw_title) break else: i = -1 if (element[-4:] == 'etal'): element = element[:-4] '''if (element[i:]==element[i:].upper()): author_arr.append(element) else:''' while (element[i:].isupper()): i -= 1 if (i < -3): break element = (element[i + 1:] + ' ' + element[:i + 1]) author_arr.append(element) if (year == 0 or title == ''): return None infoDict = { 'authors': author_arr, 'title': title.strip(), 'year': year } return infoDict
def stringToCitation(self, ref): ref = ref[ref.find('.')+1:] ref = re.split('[,.()]', ref) author_arr = [] title = '' year = 0 for idx, element in enumerate(ref): if (element.isdigit()): year = element raw_title = ref[idx + 1].lower() raw_title = raw_title.replace('‚', '').replace('"', '') #title = re.sub(r'\W+', ' ', raw_title) #title = "+".join(title.split()) title = WordInference.inferSpaces(raw_title) break else: i = -1 if (element[-4:] == 'etal'): element = element[:-4] '''if (element[i:]==element[i:].upper()): author_arr.append(element) else:''' while (element[i:].isupper()): i -= 1 if (i <- 3): break element = (element[i + 1:] + ' ' + element[:i + 1]) author_arr.append(element) if (year == 0 or title == ''): return None infoDict = {'authors': author_arr, 'title': title.strip(), 'year': year} return infoDict
def stringToCitation(self, citation): author_and_index = citation.find('and') multiple_authors = False if (citation[author_and_index+3].isupper() and citation[author_and_index+4]=='.'): #this indicates where the last listed author in the reference is citation = citation.replace('and', 'LAST_AUTHOR',1) multiple_authors = True citation = citation.split(',') if(len(citation)<3): return None authorArray = [] title = '' year = 0 authors_just_done = False title_done = False for idx, element in enumerate(citation): if (element==""): continue #for the last author following the and if(element.find('LAST_AUTHOR')!=-1): element = element.split('LAST_AUTHOR') for author in element: if(author!=''): author = ' '.join(author.split('.')) authorArray.append(author) authors_just_done = True #for all authors before the last one elif (element[0].isupper() and element[1]=='.' and authors_just_done is False): author = ' '.join(element.split('.')) authorArray.append(author) if (multiple_authors is False): authors_just_done = True #title after authors are finished elif(authors_just_done): title = element.replace("‚", '').replace('"', '') title = WordInference.inferSpaces(title.lower()) authors_just_done = False title_done = True #year after title is finished elif(title_done): i = 1 for thing in reversed(citation): yr = thing yr = re.sub('[^0-9]','', yr) try: year = int(yr) except ValueError: continue if(int(log(year+1, 10)) + 1!=4 or year > 2016): continue break break print('title: ' + title) infoDict = {'authors': authorArray, 'title': title.strip(), 'year': year} return infoDict
def stringToCitation(self, citation): author_and_index = citation.find('and') multiple_authors = False if (citation[author_and_index + 3].isupper() and citation[author_and_index + 4] == '.'): #this indicates where the last listed author in the reference is citation = citation.replace('and', 'LAST_AUTHOR', 1) multiple_authors = True citation = citation.split(',') if (len(citation) < 3): return None authorArray = [] title = '' year = 0 authors_just_done = False title_done = False for idx, element in enumerate(citation): if (element == ""): continue #for the last author following the and if (element.find('LAST_AUTHOR') != -1): element = element.split('LAST_AUTHOR') for author in element: if (author != ''): author = ' '.join(author.split('.')) authorArray.append(author) authors_just_done = True #for all authors before the last one elif (element[0].isupper() and element[1] == '.' and authors_just_done is False): author = ' '.join(element.split('.')) authorArray.append(author) if (multiple_authors is False): authors_just_done = True #title after authors are finished elif (authors_just_done): title = element.replace("‚", '').replace('"', '') title = WordInference.inferSpaces(title.lower()) authors_just_done = False title_done = True #year after title is finished elif (title_done): i = 1 for thing in reversed(citation): yr = thing yr = re.sub('[^0-9]', '', yr) try: year = int(yr) except ValueError: continue if (int(log(year + 1, 10)) + 1 != 4 or year > 2016): continue break break print('title: ' + title) infoDict = { 'authors': authorArray, 'title': title.strip(), 'year': year } return infoDict