예제 #1
0
    def stringToCitation(self, ref):

        ref = ref[ref.find('.') + 1:]
        ref = re.split('[,.()]', ref)

        author_arr = []
        title = ''
        year = 0

        for idx, element in enumerate(ref):
            if (element.isdigit()):
                year = element
                raw_title = ref[idx + 1].lower()
                raw_title = raw_title.replace('‚', '').replace('"', '')
                #title = re.sub(r'\W+', ' ', raw_title)
                #title = "+".join(title.split())
                title = WordInference.inferSpaces(raw_title)
                break
            else:
                i = -1
                if (element[-4:] == 'etal'):
                    element = element[:-4]
                '''if (element[i:]==element[i:].upper()):
                    author_arr.append(element)
                else:'''
                while (element[i:].isupper()):
                    i -= 1
                    if (i < -3):
                        break

                element = (element[i + 1:] + ' ' + element[:i + 1])
                author_arr.append(element)

        if (year == 0 or title == ''):
            return None

        infoDict = {
            'authors': author_arr,
            'title': title.strip(),
            'year': year
        }
        return infoDict
    def stringToCitation(self, ref):

        ref = ref[ref.find('.')+1:]
        ref = re.split('[,.()]', ref)

        author_arr = []
        title = ''
        year = 0

        for idx, element in enumerate(ref):
            if (element.isdigit()):
                year = element
                raw_title = ref[idx + 1].lower()
                raw_title = raw_title.replace('‚', '').replace('"', '')
                #title = re.sub(r'\W+', ' ', raw_title)
                #title = "+".join(title.split())
                title = WordInference.inferSpaces(raw_title)
                break
            else:
                i = -1
                if (element[-4:] == 'etal'):
                    element = element[:-4]

                '''if (element[i:]==element[i:].upper()):
                    author_arr.append(element)
                else:'''
                while (element[i:].isupper()):
                    i -= 1
                    if (i <- 3):
                        break

                element = (element[i + 1:] + ' ' + element[:i + 1])
                author_arr.append(element)

        if (year == 0 or title == ''):
            return None

        infoDict = {'authors': author_arr, 'title': title.strip(),
         'year': year}
        return infoDict
    def stringToCitation(self, citation):

        author_and_index = citation.find('and')
        multiple_authors = False
        if (citation[author_and_index+3].isupper() and citation[author_and_index+4]=='.'):
            #this indicates where the last listed author in the reference is 
            citation = citation.replace('and', 'LAST_AUTHOR',1)
            multiple_authors = True

        citation = citation.split(',')
        if(len(citation)<3):
            return None

        authorArray = []
        title = ''
        year = 0

        authors_just_done = False
        title_done = False
        for idx, element in enumerate(citation):
            if (element==""):
                continue
            #for the last author following the and
            if(element.find('LAST_AUTHOR')!=-1):
                element = element.split('LAST_AUTHOR')
                for author in element:
                    if(author!=''):
                        author = ' '.join(author.split('.'))
                        authorArray.append(author)
                authors_just_done = True
            #for all authors before the last one
            elif (element[0].isupper() and element[1]=='.' and authors_just_done is False):
                author = ' '.join(element.split('.'))
                authorArray.append(author)
                if (multiple_authors is False):
                    authors_just_done = True
            #title after authors are finished
            elif(authors_just_done):
                title = element.replace("‚", '').replace('"', '')
                title = WordInference.inferSpaces(title.lower())
                authors_just_done = False
                title_done = True
            #year after title is finished
            elif(title_done):
                i = 1
                for thing in reversed(citation):
                    yr = thing       
                    yr = re.sub('[^0-9]','', yr)
                    try: 
                        year = int(yr)
                    except ValueError:
                        continue
                    if(int(log(year+1, 10)) + 1!=4 or year > 2016):
                        continue
                    break

                break

        print('title: ' + title)
        infoDict = {'authors': authorArray, 'title': title.strip(),
                    'year': year}
        return infoDict
예제 #4
0
    def stringToCitation(self, citation):

        author_and_index = citation.find('and')
        multiple_authors = False
        if (citation[author_and_index + 3].isupper()
                and citation[author_and_index + 4] == '.'):
            #this indicates where the last listed author in the reference is
            citation = citation.replace('and', 'LAST_AUTHOR', 1)
            multiple_authors = True

        citation = citation.split(',')
        if (len(citation) < 3):
            return None

        authorArray = []
        title = ''
        year = 0

        authors_just_done = False
        title_done = False
        for idx, element in enumerate(citation):
            if (element == ""):
                continue
            #for the last author following the and
            if (element.find('LAST_AUTHOR') != -1):
                element = element.split('LAST_AUTHOR')
                for author in element:
                    if (author != ''):
                        author = ' '.join(author.split('.'))
                        authorArray.append(author)
                authors_just_done = True
            #for all authors before the last one
            elif (element[0].isupper() and element[1] == '.'
                  and authors_just_done is False):
                author = ' '.join(element.split('.'))
                authorArray.append(author)
                if (multiple_authors is False):
                    authors_just_done = True
            #title after authors are finished
            elif (authors_just_done):
                title = element.replace("‚", '').replace('"', '')
                title = WordInference.inferSpaces(title.lower())
                authors_just_done = False
                title_done = True
            #year after title is finished
            elif (title_done):
                i = 1
                for thing in reversed(citation):
                    yr = thing
                    yr = re.sub('[^0-9]', '', yr)
                    try:
                        year = int(yr)
                    except ValueError:
                        continue
                    if (int(log(year + 1, 10)) + 1 != 4 or year > 2016):
                        continue
                    break

                break

        print('title: ' + title)
        infoDict = {
            'authors': authorArray,
            'title': title.strip(),
            'year': year
        }
        return infoDict