Python remove_punctuationの例、library.synapse.util.remove_punctuation Pythonの例

コード例 #1

0

ファイルを表示

ファイル: models.py プロジェクト: gapson/synapse-bibliography

 def save(self, force_insert=False, force_update=False):
     if not self.stripped_title:
         self.stripped_title = remove_punctuation(self.title)
     if not self.stripped_author_names:
         self.stripped_author_names = remove_punctuation(self.author_names)
     self.mod_date = datetime.datetime.now()
     super(Document, self).save(force_insert, force_update)

コード例 #2

0

ファイルを表示

 def save(self, force_insert=False, force_update=False):
     if not self.stripped_title:
         self.stripped_title = remove_punctuation(self.title)
     if not self.stripped_author_names:
         self.stripped_author_names = remove_punctuation(self.author_names)
     self.mod_date = datetime.datetime.now()
     super(Document, self).save(force_insert, force_update)

コード例 #3

0

ファイルを表示

ファイル: loader.py プロジェクト: gapson/synapse-bibliography

def get_first_author(record):
    first_author = None
    if record.authors:
        first_author, drop, remainder = record.authors.partition(' ')
        first_author = first_author.rstrip().rstrip(',')
        first_author = remove_punctuation(first_author)
    return first_author

コード例 #4

0

ファイルを表示

ファイル: loader.py プロジェクト: gapson/synapse-bibliography

def get_first_author(record):
    first_author = None
    if record.authors:
        first_author, drop, remainder = record.authors.partition(' ')
        first_author = first_author.rstrip().rstrip(',')
        first_author = remove_punctuation(first_author)
    return first_author

コード例 #5

0

ファイルを表示

ファイル: loader.py プロジェクト: gapson/synapse-bibliography

def create_publications(record, document, nameorder_significant=False):
    # goal:  create a publication record for each author, populate record with appropriate
    # institution, affiliation, name order, and employee reference when and as possible
    
    # step 1:  create a publication record for each author
    publications = []
    authors = []
    if record.authors and ';' in record.authors:
        authors = record.authors.split(';')
    elif record.authors and ',' in record.authors and not ';' in record.authors:
        authors = record.authors.split(',')
        if len(authors) == 2 and len(authors[1]) == 1:
            authors = [record.authors]
    elif record.book_authors and ';' in record.book_authors:
        authors = record.book_authors.split(';')
    elif record.book_authors and ',' in record.book_authors and not ';' in record.authors:
        authors = record.book_authors.split(',')
    else:
        authors.append(record.authors)
    authors = map(string.strip, authors)
    for name in authors:
        nameorder = None
        if len(authors) == 1:
            nameorder, no_created = NameOrder.objects.get_or_create(order='Sole')
        elif name == authors[0] and not name == authors[-1]:
            nameorder, no_created = NameOrder.objects.get_or_create(order='First')
        elif name == authors[-1] and not name == authors[0]:
            nameorder, no_created = NameOrder.objects.get_or_create(order='Last')
        else:
            nameorder, no_created = NameOrder.objects.get_or_create(order='Contributor')
            
        publication, pub_created = Publication.objects.get_or_create(author_name=name.strip(),
                                                                     name_order=nameorder,
                                                                     document=document)
        publications.append(publication)

        # step 2: handle significant nameorder for CINAHL records
        if nameorder_significant and nameorder and nameorder.order in ('Sole', 'First'):
            publication.affiliation = record.affiliations
            address = record.affiliations
            if 'MSKCC' in address or 'Sloan-Kettering' in address or '1275 York' in address or 'Sloan Kettering' in address:
                publication.institution = Institution.objects.get(name='Memorial Sloan-Kettering Cancer Center')
            publication.save()


    # step 3: handle Correspondence Address, to catch anything missed earlier
    if record.correspondence_address:
        for pub in publications:
            if pub.author_name in record.correspondence_address:
                modified = False
                if not pub.affiliation:
                    pub.affiliation = record.correspondence_address
                    modified = True
                if not pub.institution:
                    address = record.correspondence_address
                    if 'MSKCC' in address or 'Sloan-Kettering' in address or '1275 York' in address or 'Sloan Kettering' in address:
                        pub.institution = Institution.objects.get(name='Memorial Sloan-Kettering Cancer Center')
                        modified = True
                if modified:
                    pub.save()
        


                                
    # step 4: populate each publication with appropriate employee, as possible
    for pub in publications:
        # handle PSYCINFO affiliations
        if record.affiliations:
            for affiliation in record.affiliations.split(';'):
                if pub.author_name in affiliation:
                    pub.affiliation = affiliation
                    if 'MSKCC' in affiliation or 'Sloan-Kettering' in affiliation or '1275 York' in affiliation or 'Sloan Kettering' in affiliation:
                        pub.institution = Institution.objects.get(name='Memorial Sloan-Kettering Cancer Center')
                    pub.save()
            # handle SCOPUS affiliations
            if len(record.affiliations.split(';')) == len(authors):
                name_index = None
                try:
                    name_index = authors.index(pub.author_name)
                except ValueError:  #names don't match
                    stripped_authors = map(remove_punctuation, authors)
                    name_index = stripped_authors.index(remove_punctuation(pub.author_name))
                if name_index:
                    pub.affiliation = record.affiliations.split(';')[name_index]
                    if 'MSKCC' in pub.affiliation or 'Sloan-Kettering' in pub.affiliation or '1275 York' in pub.affiliation or 'Sloan Kettering' in pub.affiliation:
                        pub.institution = Institution.objects.get(name='Memorial Sloan-Kettering Cancer Center')
                    pub.save()
        create_author(pub, pub.author_name)

コード例 #6

0

ファイルを表示

ファイル: loader.py プロジェクト: gapson/synapse-bibliography

def create_document(record, source):
    document = None
    q = Q()
    if record.doi:
        documents = Document.objects.filter(doi__exact=record.doi)
        if len(documents) == 1:
            document = documents[0]
    if record.authors and record.pages and not document:
        # here's the first de-punctuated comparison point
        first_author = get_first_author(record)  # includes de-punctuation
        first_page = record.pages.split('-')[0].strip()
        if first_page and not first_page.isspace():
            documents = Document.objects.filter(page_range__isnull=False).filter(stripped_author_names__istartswith=first_author,
                  page_range__istartswith=first_page)
            if len(documents) == 1:
                document = documents[0]
    if record.authors and record.title and not document:
        # here's the second de-punctuated comparison point
        first_author = get_first_author(record) # includes de-punctuation
        stripped_title = remove_punctuation(record.title)
        documents = Document.objects.filter(stripped_author_names__istartswith=first_author,
              stripped_title__iexact=stripped_title)
        if len(documents) == 1:
            document = documents[0]


    if not document:
        document = Document.objects.create(title=record.title,
                                           author_names=record.authors,
                                           doi=record.doi)
                                                                
    modified = False
    if record.doi and not document.doi:
        document.doi = record.doi
        modified = True
    if record.title and not document.title:
        document.title = record.title
        modified = True
    if document.title and not document.stripped_title:
        document.stripped_title = remove_punctuation(document.title)
        modified = True
    if record.authors and not document.author_names:
        document.author_names = record.authors
        modified = True
    if record.authors and not document.stripped_author_names:
        # here's where we set the de-punctuated author
        document.stripped_author_names = remove_punctuation(record.authors)
        modified = True
    if record.source and not document.source:
        document.source = source
        modified = True
    if record.abstract and not document.abstract:
        document.abstract = record.abstract
        modified = True
    if record.volume and not document.volume:
        document.volume = record.volume
        modified = True
    if record.issue and not document.issue:
        document.issue = record.issue
        modified = True
    if record.pages and not document.page_range:
        document.page_range = record.pages
        modified = True
    if record.language and not document.language:
        document.language = record.language
        modified = True
    if record.publish_year and not document.publish_year:
        document.publish_year = record.publish_year
        modified = True
    if record.publish_date and not document.publish_date:
        document.publish_date = record.publish_date
        modified = True
    if record.document_type and not document.document_type:
        document.document_type = map_document_type(record.document_type)
        modified = True
    if record.document_subtype and not document.document_subtype:
        document.document_subtype = record.document_subtype
        modified = True
    if record.dmt and not document.dmt:
        document.dmt = DiseaseManagementTeam.objects.get(id=record.dmt)
        modified = True
    if record.affiliations and not document.affiliations:
        document.affiliations = record.affiliations
        modified = True
    if modified:
        document.save()
    return document

コード例 #7

0

ファイルを表示

ファイル: loader.py プロジェクト: gapson/synapse-bibliography

def create_publications(record, document, nameorder_significant=False):
    # goal:  create a publication record for each author, populate record with appropriate
    # institution, affiliation, name order, and employee reference when and as possible

    # step 1:  create a publication record for each author
    publications = []
    authors = []
    if record.authors and ';' in record.authors:
        authors = record.authors.split(';')
    elif record.authors and ',' in record.authors and not ';' in record.authors:
        authors = record.authors.split(',')
        if len(authors) == 2 and len(authors[1]) == 1:
            authors = [record.authors]
    elif record.book_authors and ';' in record.book_authors:
        authors = record.book_authors.split(';')
    elif record.book_authors and ',' in record.book_authors and not ';' in record.authors:
        authors = record.book_authors.split(',')
    else:
        authors.append(record.authors)
    authors = map(string.strip, authors)
    for name in authors:
        nameorder = None
        if len(authors) == 1:
            nameorder, no_created = NameOrder.objects.get_or_create(
                order='Sole')
        elif name == authors[0] and not name == authors[-1]:
            nameorder, no_created = NameOrder.objects.get_or_create(
                order='First')
        elif name == authors[-1] and not name == authors[0]:
            nameorder, no_created = NameOrder.objects.get_or_create(
                order='Last')
        else:
            nameorder, no_created = NameOrder.objects.get_or_create(
                order='Contributor')

        publication, pub_created = Publication.objects.get_or_create(
            author_name=name.strip(), name_order=nameorder, document=document)
        publications.append(publication)

        # step 2: handle significant nameorder for CINAHL records
        if nameorder_significant and nameorder and nameorder.order in (
                'Sole', 'First'):
            publication.affiliation = record.affiliations
            address = record.affiliations
            if 'MSKCC' in address or 'Sloan-Kettering' in address or '1275 York' in address or 'Sloan Kettering' in address:
                publication.institution = Institution.objects.get(
                    name='Memorial Sloan-Kettering Cancer Center')
            publication.save()

    # step 3: handle Correspondence Address, to catch anything missed earlier
    if record.correspondence_address:
        for pub in publications:
            if pub.author_name in record.correspondence_address:
                modified = False
                if not pub.affiliation:
                    pub.affiliation = record.correspondence_address
                    modified = True
                if not pub.institution:
                    address = record.correspondence_address
                    if 'MSKCC' in address or 'Sloan-Kettering' in address or '1275 York' in address or 'Sloan Kettering' in address:
                        pub.institution = Institution.objects.get(
                            name='Memorial Sloan-Kettering Cancer Center')
                        modified = True
                if modified:
                    pub.save()

    # step 4: populate each publication with appropriate employee, as possible
    for pub in publications:
        # handle PSYCINFO affiliations
        if record.affiliations:
            for affiliation in record.affiliations.split(';'):
                if pub.author_name in affiliation:
                    pub.affiliation = affiliation
                    if 'MSKCC' in affiliation or 'Sloan-Kettering' in affiliation or '1275 York' in affiliation or 'Sloan Kettering' in affiliation:
                        pub.institution = Institution.objects.get(
                            name='Memorial Sloan-Kettering Cancer Center')
                    pub.save()
            # handle SCOPUS affiliations
            if len(record.affiliations.split(';')) == len(authors):
                name_index = None
                try:
                    name_index = authors.index(pub.author_name)
                except ValueError:  #names don't match
                    stripped_authors = map(remove_punctuation, authors)
                    name_index = stripped_authors.index(
                        remove_punctuation(pub.author_name))
                if name_index:
                    pub.affiliation = record.affiliations.split(
                        ';')[name_index]
                    if 'MSKCC' in pub.affiliation or 'Sloan-Kettering' in pub.affiliation or '1275 York' in pub.affiliation or 'Sloan Kettering' in pub.affiliation:
                        pub.institution = Institution.objects.get(
                            name='Memorial Sloan-Kettering Cancer Center')
                    pub.save()
        create_author(pub, pub.author_name)

コード例 #8

0

ファイルを表示

ファイル: loader.py プロジェクト: gapson/synapse-bibliography

def create_document(record, source):
    document = None
    q = Q()
    if record.doi:
        documents = Document.objects.filter(doi__exact=record.doi)
        if len(documents) == 1:
            document = documents[0]
    if record.authors and record.pages and not document:
        # here's the first de-punctuated comparison point
        first_author = get_first_author(record)  # includes de-punctuation
        first_page = record.pages.split('-')[0].strip()
        if first_page and not first_page.isspace():
            documents = Document.objects.filter(
                page_range__isnull=False).filter(
                    stripped_author_names__istartswith=first_author,
                    page_range__istartswith=first_page)
            if len(documents) == 1:
                document = documents[0]
    if record.authors and record.title and not document:
        # here's the second de-punctuated comparison point
        first_author = get_first_author(record)  # includes de-punctuation
        stripped_title = remove_punctuation(record.title)
        documents = Document.objects.filter(
            stripped_author_names__istartswith=first_author,
            stripped_title__iexact=stripped_title)
        if len(documents) == 1:
            document = documents[0]

    if not document:
        document = Document.objects.create(title=record.title,
                                           author_names=record.authors,
                                           doi=record.doi)

    modified = False
    if record.doi and not document.doi:
        document.doi = record.doi
        modified = True
    if record.title and not document.title:
        document.title = record.title
        modified = True
    if document.title and not document.stripped_title:
        document.stripped_title = remove_punctuation(document.title)
        modified = True
    if record.authors and not document.author_names:
        document.author_names = record.authors
        modified = True
    if record.authors and not document.stripped_author_names:
        # here's where we set the de-punctuated author
        document.stripped_author_names = remove_punctuation(record.authors)
        modified = True
    if record.source and not document.source:
        document.source = source
        modified = True
    if record.abstract and not document.abstract:
        document.abstract = record.abstract
        modified = True
    if record.volume and not document.volume:
        document.volume = record.volume
        modified = True
    if record.issue and not document.issue:
        document.issue = record.issue
        modified = True
    if record.pages and not document.page_range:
        document.page_range = record.pages
        modified = True
    if record.language and not document.language:
        document.language = record.language
        modified = True
    if record.publish_year and not document.publish_year:
        document.publish_year = record.publish_year
        modified = True
    if record.publish_date and not document.publish_date:
        document.publish_date = record.publish_date
        modified = True
    if record.document_type and not document.document_type:
        document.document_type = map_document_type(record.document_type)
        modified = True
    if record.document_subtype and not document.document_subtype:
        document.document_subtype = record.document_subtype
        modified = True
    if record.dmt and not document.dmt:
        document.dmt = DiseaseManagementTeam.objects.get(id=record.dmt)
        modified = True
    if record.affiliations and not document.affiliations:
        document.affiliations = record.affiliations
        modified = True
    if modified:
        document.save()
    return document