Esempio n. 1
0
def generate_csv(llrs, filename_output):
    with io.open(filename_output, 'w',
                 encoding='utf-8-sig') as csv_file:  #UTF-8 BOM
        headers = LandLibraryResource.get_csv_header_line()
        csv_file.write(headers)
        for llr in llrs:
            csv_file.write(llr.as_csv_line())
        csv_file.close()
Esempio n. 2
0
def generate_csv(llrs, filename):
    with open(filename, 'w', encoding="utf-8") as csv_file:
        #csv_file.write((u'\ufeff').encode('utf8')) #BOM
        # TODO: change headers
        headers = LandLibraryResource.get_csv_header_line()
        csv_file.write(headers)

        for llr in llrs:
            #print llr.__dict__
            csv_file.write(llr.as_csv_line())
        csv_file.close()
Esempio n. 3
0
def create_llr_from_lareferencia_record(lareferencia_record):
    llr = LandLibraryResource()

    #ID. Only one.
    internal_id = lareferencia_record["id"]
    llr.set_id(u"LaReferencia:" + internal_id)

    #title. One
    title = lareferencia_record["title"]
    llr.set_title(title)

    #subtitle. Zero or One
    if "subtitle" in lareferencia_record:
        subtitle = lareferencia_record["subtitle"]
        llr.set_subtitle(subtitle)

    # description. Zero or One
    if "summary" in lareferencia_record:
        description = lareferencia_record["summary"][0]
        if description:
            llr.set_description(description)

    #Language. Zero, one or more
    langs_cleared = set()
    if "languages" in lareferencia_record:
        languages = lareferencia_record["languages"]

        for lang in languages:
            langs_cleared.add(llrutils.getISO639_1code_from_ISO639_3code(lang))
        langs_cleared = set(filter(None, langs_cleared))

    if not langs_cleared:
        try:
            potential_lang = detect(title.lower())
            if potential_lang in ["es", "pt", "en"]:
                langs_cleared.add(potential_lang)
        except LangDetectException:
            pass
    llr.set_languages(langs_cleared)

    #author. One or more
    authors = lareferencia_record["primaryAuthors"]
    if "secondaryAuthors" in lareferencia_record:
        authors += lareferencia_record["secondaryAuthors"]
    llr.set_authors(authors)

    #corporate_authors. Could be more than one
    if "corporateAuthors" in lareferencia_record:
        llr.set_corporate_authors(lareferencia_record["corporateAuthors"])

    #publishers. Zero, one or more
    if "dc.publisher.none.fl_str_mv" in lareferencia_record["rawData"]:
        llr.set_publishers(
            filter(
                None, {
                    utils.getPublisher(pub)
                    for pub in lareferencia_record["rawData"]
                    ["dc.publisher.none.fl_str_mv"]
                }))

    #type. One
    types = set()
    formats = lareferencia_record["formats"]
    types.add(utils.getLLR_type(formats[0]))
    if "dc.type.none.fl_str_mv" in lareferencia_record["rawData"]:
        for f in lareferencia_record["rawData"]["dc.type.none.fl_str_mv"]:
            if f == "Artículos de congreso":
                types.add("Conference Papers & Reports")
            if f == "Articulo evaluado por dos pares" or f == 'artículos evaluados por pares' or f == 'Artículo evaluado por pares ciegos y producto de investigación' or f == 'Artículo evaluado por pares' or f == "Art?culo revisado por pares" or f == 'Artículo revisado por pares':
                types.add("Peer-reviewed publication")
    llr.set_types(list(types))

    #number of pages. Only one
    #If there is a last page, there is an initial page
    if "dc.description.lastpage.pt.fl_txt_mv" in lareferencia_record[
            "rawData"]:
        lastpage = lareferencia_record["rawData"][
            "dc.description.lastpage.pt.fl_txt_mv"][0]
        initialpage = lareferencia_record["rawData"][
            "dc.description.initialpage.pt.fl_txt_mv"][0]
        number_pages = int(lastpage) - int(initialpage)
        if number_pages:
            llr.set_number_pages(number_pages)

    #date.
    publicationDates = lareferencia_record["publicationDates"][0]
    best_date = publicationDates
    if "dc.date.none.fl_str_mv" in lareferencia_record["rawData"]:
        for potentialDate in lareferencia_record["rawData"][
                "dc.date.none.fl_str_mv"]:
            if publicationDates in potentialDate:
                best_date = utils.clean_date(potentialDate.split("T")[0])
    llr.set_date(utils.clean_date(best_date))

    #original url. Only one
    lareferencia_url = "http://www.lareferencia.info/vufind/Record/" + internal_id
    llr.set_original_url(lareferencia_url)

    #resource url. Only one. Remove locahost
    resource_url = None
    if "bitstream.url.fl_str_mv" in lareferencia_record["rawData"]:
        potential_urls = lareferencia_record["rawData"][
            "bitstream.url.fl_str_mv"]
        if len(potential_urls) == 1 and ("://localhost"
                                         not in potential_urls[0]):
            resource_url = potential_urls[0]
        else:
            for url in potential_urls:
                if "://localhost" in url:
                    continue
                if url.endswith(".pdf") or url.endswith(".PDF"):
                    resource_url = url
            if not resource_url and ("://localhost" not in url):
                resource_url = potential_urls[
                    0]  # arbitray. Take the first one
    elif "url" in lareferencia_record["rawData"]:
        resource_url = lareferencia_record["rawData"]["url"][0]

    llr.set_resource_url(resource_url)

    #License
    license_llr = None
    copyright_details = None
    if "dc.rights.none.fl_str_mv" in lareferencia_record["rawData"]:
        for potential_license in lareferencia_record["rawData"][
                "dc.rights.none.fl_str_mv"]:
            if not llrutils.checkOpenAccess(potential_license):
                return None  # STOP. Return None
            if "info:eu-repo/semantics/openAccess" in potential_license:
                copyright_details = "info:eu-repo/semantics/openAccess : Open Access, this refers to access without restrictions, and without financial incentives. Access to the resource is gained directly, without any obstacles."  #From https://wiki.surfnet.nl/display/standards/info-eu-repo/#info-eu-repo-AccessRights    if "rights_invalid_str_mv" in lareferencia_record["rawData"]:
    if "rights_invalid_str_mv" in lareferencia_record["rawData"]:
        for potential_license in lareferencia_record["rawData"][
                "rights_invalid_str_mv"]:
            if not llrutils.checkOpenAccess(potential_license):
                return None  # STOP. Return None
            if "Copyright" in potential_license:
                copyright_details = potential_license
            if "creativecommons.org" in potential_license:
                license_llr = llrutils.getCCLicenseAcronym(potential_license)
            if "info:eu-repo/semantics/openAccess" in potential_license and not copyright_details:
                copyright_details = "info:eu-repo/semantics/openAccess : Open Access, this refers to access without restrictions, and without financial incentives. Access to the resource is gained directly, without any obstacles."  #From https://wiki.surfnet.nl/display/standards/info-eu-repo/#info-eu-repo-AccessRights    if "rights_invalid_str_mv" in lareferencia_record["rawData"]:
    if "dc.rights.driver.fl_str_mv" in lareferencia_record["rawData"]:
        for potential_license in lareferencia_record["rawData"][
                "dc.rights.driver.fl_str_mv"]:
            if not llrutils.checkOpenAccess(potential_license):
                return None  # STOP. Return None
            if "Copyright" in potential_license:
                copyright_details = potential_license
            if "creativecommons.org" in potential_license:
                license_llr = llrutils.getCCLicenseAcronym(potential_license)
            if "info:eu-repo/semantics/openAccess" in potential_license and not copyright_details:
                copyright_details = "info:eu-repo/semantics/openAccess : Open Access, this refers to access without restrictions, and without financial incentives. Access to the resource is gained directly, without any obstacles."  #From https://wiki.surfnet.nl/display/standards/info-eu-repo/#info-eu-repo-AccessRights    if "rights_invalid_str_mv" in lareferencia_record["rawData"]:
    llr.set_license(license_llr)
    llr.set_copyright_details(copyright_details)

    #data provider
    llr.set_data_provider(u"LA Referencia")

    #image
    #llr.set_image("")

    #keywords
    potential_subjects = set()
    for subject in lareferencia_record["subjects"]:
        potential_subjects.add(
            LANDVOC.get_EnglishPrefLabel(subject[0], lang="es"))
        potential_subjects.add(
            LANDVOC.get_EnglishPrefLabel(subject[0], lang="en"))
        potential_subjects.add(
            LANDVOC.get_EnglishPrefLabel(subject[0], lang="pt"))

    concepts = [unicode(s, "utf-8") for s in filter(None, potential_subjects)]
    themes = LANDVOC.get_fixed_themes(concepts)
    oacs = LANDVOC.get_fixed_oacs(concepts)

    llr.set_concepts(concepts)
    llr.set_themes(themes)
    llr.set_overarching_categories(oacs)

    #geographical focus. list
    countries_focus = set()
    countries_focus |= set(utils.getPlaceET_fromText_NLTK(llr.title)) | set(
        utils.getPlaceET_fromText_GeoText(llr.title))
    countries_focus |= set(utils.getPlaceET_fromText_NLTK(
        llr.description)) | set(
            utils.getPlaceET_fromText_GeoText(llr.description))
    for subject in lareferencia_record["subjects"]:
        countries_focus |= set(utils.getPlaceET_fromText_NLTK(
            subject[0])) | set(utils.getPlaceET_fromText_GeoText(subject[0]))
    llr.set_geographical_focus(countries_focus, set())

    return llr
Esempio n. 4
0
def create_llr_from_RIS(ris_entry):
    llr_record = LandLibraryResource()

    #type. Only one
    if ris_entry["type_of_reference"] == "EJOU":
        llr_record.set_types([u"Peer-reviewed publication"])

    #title. Only one
    title = ris_entry["title"]
    llr_record.set_title(title)

    #subtitle

    # description
    description = ris_entry["abstract"]
    llr_record.set_description(description)

    #author. One or more
    authors = ris_entry["authors"]
    llr_record.set_authors(authors)

    #corporate_authors. Could be more than one

    #publishers. One or more (and more could be in the same item or in a different one
    publishers = [u"Land Journal"]
    llr_record.set_publishers(publishers)

    #ID. Only one.
    doi = ris_entry["doi"]
    llr_record.set_id(doi)

    #number of pages. Only one

    #date.
    year = ris_entry["year"]  # format (YYYY)
    issue = ris_entry["number"]
    month = issue
    if int(year) < 2019:
        switcher_pre_2019 = {
            "1": "03",
            "2": "06",
            "3": "09",
            "4": "12",
        }
        month = switcher_pre_2019.get(issue, "2")
    publish_date = llrutils.clean_date(year + "-" +
                                       month)  # format (YYYY-MM-DD)
    llr_record.set_date(publish_date)

    volume = ris_entry["volume"]
    init_page = str(int(doi[-4:]))
    original_url = "http://www.mdpi.com/2073-445X/" + volume + "/" + issue + "/" + init_page + "/"
    resource_url = original_url + "pdf"

    #original url. Only one
    llr_record.set_original_url(original_url)

    #resource url. Only one
    llr_record.set_resource_url(resource_url)

    #     #plan B
    #     resource_url = "http://dx.doi.org/"+doi
    #     resp = requests.head(resource_url)
    #     location = resp.headers["Location"]

    #License
    llr_record.set_license(u"Creative Commons Attribution")

    #Copyright details
    copyright_details = u"© " + year + " by the authors; licensee MDPI, Basel, Switzerland. This article is an open access article."
    llr_record.set_copyright_details(copyright_details)

    #data provider
    llr_record.set_data_provider(u"Land Journal")

    #image
    #image_path = "private://feeds/cover-land-v"+volume+"-i"+issue+".png"
    image_url = "https://www.mdpi.com/data/covers/land/cover-land-v" + volume + "-i" + issue + ".png"
    llr_record.set_image(image_url)
    #llr_record.set_image("private://feeds/LandJournal-thumbnail.png")

    #keywords
    keywords = ris_entry["keywords"]
    concepts_list.extend(keywords)

    llr_record.set_potential_list(keywords)

    #geographical focus. list
    final_places = set()
    final_places |= set(
        llrutils.flatten3(
            filter(None, [llrutils.getISO3166_1code(k) for k in keywords])))
    final_places |= set(
        llrutils.flatten3(
            filter(None, [llrutils.getUNM49code(k) for k in keywords])))

    #NLTK for geo
    if not final_places:
        final_places |= set(
            llrutils.getPlaceET_fromText_NLTK(llr_record.get_title())) | set(
                llrutils.getPlaceET_fromText_GeoText(llr_record.get_title()))
        final_places |= set(
            llrutils.getPlaceET_fromText_NLTK(
                llr_record.get_description())) | set(
                    llrutils.getPlaceET_fromText_GeoText(
                        llr_record.get_description()))

    final_places = set(filter(None, llrutils.flatten3(final_places)))

    if not final_places:
        final_places.add("001")

    llr_record.set_geographical_focus(final_places)

    concepts = LANDVOC.get_concepts_direct(keywords)

    synonyms = LANDVOC.get_concepts_synonymsEN(keywords)

    concepts.extend(synonyms)

    landjournal_mapping = set(
        LANDVOC_LANDJOURNAL.get_concepts_landjournal_related(keywords))

    #parse the title
    for concept in LANDVOC.parse_get_concepts(title):
        if concept != "land":
            concepts.append(LANDVOC.get_EnglishPrefLabel(concept, lang="en"))

    concepts.extend(landjournal_mapping)

    if concepts:
        concepts = set(concepts)
    else:
        concepts = set([u"land", u"research"])

    themes = LANDVOC.get_fixed_themes(concepts)

    llr_record.set_concepts(concepts)
    llr_record.set_themes(themes)

    #Language
    langs = [u"en"]
    llr_record.set_languages(langs)

    # Add journal volumen
    subtitle = "Volume " + volume + " Issue " + issue
    llr_record.set_subtitle(subtitle)

    return llr_record
Esempio n. 5
0
def create_llrs(resultDict):
    llrs = []
    for key, value in resultDict.iteritems():
        llr = LandLibraryResource()

        #title. Only one
        title = get_value_metatag(value, 'Title')
        llr.set_title(title)

        #subtitle.
        subtitle = get_value_metatag(value, 'subtitle')
        if subtitle:
            llr.set_subtitle(subtitle)

        # compound, because it can be splitted in several abstracts (present in 98 records)
        description = "".join([
            x['MT']['V'] for x in value['R']['mts']
            if x['MT']['N'] == 'abstract'
        ])
        llr.set_description(description)

        #author. Only one
        author_en = get_value_metatag(value, 'author_en')
        author_author = get_value_metatag(value, 'Author')
        personalAuthor_en = get_value_metatag(value, 'personalAuthor_en')
        corporateAuthor_en = get_value_metatag(value, 'corporateAuthor_en')
        department_en = get_value_metatag(value, 'department_en')

        possible_authors = [
            author_en, author_author, personalAuthor_en, corporateAuthor_en,
            department_en
        ]
        authors = utils.generate_authors(possible_authors)

        llr.set_authors(authors)
        corporate_authors_AND_publishers = utils.generate_organizations(
            possible_authors)
        llr.set_corporate_authors(corporate_authors_AND_publishers)
        llr.set_publishers(corporate_authors_AND_publishers)

        #type. Only one
        docType = get_value_metatag(value, 'docType')
        docTypes = utils.get_types(docType)
        llr.set_types(docTypes)

        if not description and docType == "Meeting":
            meeting_en = get_value_metatag(value, 'meeting_en')
            meetingDocSymbol = get_value_metatag(value, 'meetingDocSymbol')
            session = get_value_metatag(value, 'session')
            if meeting_en:
                description = description + "Meeting Name: " + meeting_en
            if meetingDocSymbol:
                description = description + "\nMeeting symbol/code: " + meetingDocSymbol
            if session:
                description = description + "\nSession: " + session
            llr.set_description(description)

        #resource_url. Only one
        resource_url = value['R']['U']
        llr.set_resource_url(resource_url)

        #ID. Only one
        uuid = get_value_metatag(value, 'uuid')
        final_id = "FAODOCREP:" + uuid
        llr.set_id(final_id)

        #original url. Only one
        cardURL = get_value_metatag(value, 'cardURL')
        llr.set_original_url(cardURL)

        #metadata language. Only one
        # gsaentity_google_language = get_value_metatag(value, 'gsaentity_google_language') # language of the metadata (label)
        _lang = value['R']['LANG']  # language of the metadata (ISO code)
        llr.set_metadata_language(_lang)

        languages = set()

        _defaultLanguage = get_value_metatag(value, "defaultLanguage")
        if len(_defaultLanguage) == 2:
            languages.add(_defaultLanguage)
        else:
            languages.add(_lang)
        #languages of the resource. Could be more than one value
        # defaultLanguage = get_value_metatag(value, 'defaultLanguage') #  language of the metadata (label). Not working for multiple languages
        #allLanguages = get_values_metatag(value, "allLanguages") # languages of the resources (ISO codes)
        #allLanguages = map(unicode.lower, allLanguages)

        #languages |= set(allLanguages)
        languages = filter(lambda x: len(x) == 2,
                           languages)  # filter languages only with 2 letters
        llr.set_languages(languages)

        #number of pages. Only one
        page_count = get_value_metatag(value, 'pages')
        if page_count:
            page_count = page_count.replace(" p.", "")
        llr.set_number_pages(page_count)

        #License
        llr.set_license("All rights reserved")

        #Copyright details
        if llr.metadata_language == "S":
            llr.set_copyright_details(
                u"© FAO. Para garantizar la amplia difusión de su información, la FAO se ha comprometido a poner libremente a disposición de los interesados este contenido y alienta el uso, la reproducción y la difusión de los textos, productos multimedia y datos presentados. Salvo que se indique lo contrario, el contenido se puede copiar, imprimir y descargar con fines de estudio privado, investigación y docencia, y para uso en productos o servicios no comerciales, siempre que se reconozca de forma adecuada a la FAO como fuente y titular de los derechos de autor  y que no se indique o que ello implique en modo alguno que la FAO aprueba los puntos de vista, productos o servicios de los usuarios."
            )
        elif llr.metadata_language == "F":
            llr.set_copyright_details(
                u"© FAO. Afin d’assurer une large diffusion de ses informations, la FAO s’attache à donner libre accès à ce contenu et encourage l’utilisation, la reproduction et la diffusion des données, des informations textuelles et des supports multimédia présentés. Sauf indication contraire, le contenu peut être reproduit, imprimé et téléchargé aux fins d’étude privée, de recherches ou d’enseignement ainsi que pour utilisation dans des produits ou services non commerciaux, sous réserve que la FAO soit correctement mentionnée comme source et comme titulaire du droit d’auteur et à condition qu’il ne soit ni déclaré ni sous-entendu en aucune manière que la FAO approuverait les opinions, produits ou services des utilisateurs."
            )
        else:
            llr.set_copyright_details(
                u"© FAO. FAO is committed to making its content freely available and encourages the use, reproduction and dissemination of the text, multimedia and data presented. Except where otherwise indicated, content may be copied, printed and downloaded for private study, research and teaching purposes, and for use in non-commercial products or services, provided that appropriate acknowledgement of FAO as the source and copyright holder is given and that FAO's endorsement of users' views, products or services is not stated or implied in any way."
            )

        #data provider
        llr.set_data_provider(
            u"Food and Agriculture Organization of the United Nations (FAO)")

        #date.
        publicationDate = str(get_value_metatag(value, 'publicationDate'))
        if publicationDate.startswith("Dec 2012"):
            publicationDate = "12/2012"
        publicationDate = "".join(
            publicationDate.split())  #remove all whitespaces
        publicationDate = publicationDate.replace(
            "//", "/")  #replace duplicated symbol '/'
        publicationDate = publicationDate.translate(None, '[]')
        if len(publicationDate) == 4:  #just the year
            publicationDate = ("12/") + publicationDate

        #change order to be YYYY-MM
        final_date = publicationDate.split(
            "/")[1] + "-" + publicationDate.split("/")[0]

        final_date = llrutils.clean_date(final_date)
        llr.set_date(final_date)

        #geographical focus. list
        gsaentity_Country = get_value_metatag(value, 'gsaentity_Country')
        country_id_list = set(
            get_values_metatag(value, 'country_en') +
            (gsaentity_Country.split(',') if gsaentity_Country else list()))
        country_iso_list = set()
        for country in country_id_list:
            country_iso_list.add(utils.getISO3166_1_alpha3_code(country))

        region_en_list = get_values_metatag(value, 'region_en')
        region_m49_list = set()
        for region in region_en_list:
            region_m49_list.add(utils.getUNM49code(unicode(region).title()))

        llr.set_geographical_focus(country_iso_list, region_m49_list)

        #landvoc concepts
        potential_concepts = get_values_metatag(value, 'agrovoc_en')
        #direct_mapping = LandVocFAODOCREP.get_concepts_direct(potential_concepts)
        direct_mapping = LANDVOC.get_concepts_direct(potential_concepts)
        related_mapping = LandVocFAODOCREP.get_concepts_faolex_related(
            potential_concepts)
        concepts_harvest_enhancement = LandVocFAODOCREP.get_concepts_harvest_enhancement(
            potential_concepts)
        concepts = set(direct_mapping + related_mapping +
                       concepts_harvest_enhancement)
        concepts = filter(None, concepts)
        concepts = map(unicode, concepts)

        llr.set_concepts(concepts)

        #         #Overarching categories
        #         oacs = LandVocFAODOCREP.get_oacs_harvest_enhancement(potential_concepts)
        #         oacs |= LANDVOC.get_fixed_oacs(concepts)
        #         llr.set_overarching_categories(oacs)

        #Themes
        themes = LandVocFAODOCREP.get_themes_harvest_enhancement(
            potential_concepts)
        themes |= LANDVOC.get_fixed_themes(concepts)
        llr.set_themes(themes)

        #Thumbnail
        thumbnail = get_value_metatag(value, 'thumb200')
        llr.set_image(thumbnail)
        llrs.append(llr)


#     #set of langs
#     print sorted(dates, key=lambda s: s.lower())
#
#     # stats
#     for item in sorted(set(no_desc), key=lambda s: s.lower()):
#         print "%s;%d" %(item, no_desc.count(item))

    print "**********************************"
    return llrs
Esempio n. 6
0
def create_llrs(resultDict):
    llrs = []
    i = 0
    for key, value in resultDict.iteritems():
        llr = LandLibraryResource()

        #repealed_object = next((x for x in value['R']['mts'] if x['MT']['N']=='repealed'), None)
        #if repealed_object is not None:
        #    continue

        #title. Only one
        title = get_value_metatag(value, 'titleOfText')
        llr.set_title(title)

        #subtitle. 1st longTitle, 2nd original title, 3rd None
        subtitle_longtitle = get_value_metatag(value, 'longTitleOfText')
        subtitle_originaltitle = get_value_metatag(value,
                                                   'originalTitleOfText')
        if subtitle_longtitle is not None:
            llr.set_subtitle(subtitle_longtitle)
        else:
            llr.set_subtitle(subtitle_originaltitle)

        # compound, because it can be splitted in several abstracts (present in 98 records)
        description = "".join([
            x['MT']['V'] for x in value['R']['mts']
            if x['MT']['N'] == 'abstract'
        ])
        #llr.set_description(description)

        #author. Only one
        authors = get_value_metatag(value, 'Author')
        if authors:
            llr.set_authors([authors])

        #type. Only one
        type_original = get_value_metatag(value, 'typeOfTextCode')
        types_lp = utils.getLLR_types(type_original)
        llr.set_types(types_lp)

        #metadata language. Only one
        metadata_language = get_value_metatag(value, 'recordLanguage')
        llr.set_metadata_language(metadata_language)

        #resource_url. Only one
        linksToFullText = get_value_metatag(value, 'linksToFullText')
        resource_url = "http://extwprlegs1.fao.org/docs/pdf/" + linksToFullText
        llr.set_resource_url(resource_url)

        #ID. Only one
        faolexId = get_value_metatag(value, 'faolexId')
        llr.set_id(faolexId)

        #original url. Only one
        original_url = "http://www.fao.org/faolex/results/details/en/c/" + faolexId
        llr.set_original_url(original_url)

        #number of pages. Only one
        page_count = get_value_metatag(value, 'page count')
        llr.set_number_pages(page_count)

        #License
        llr.set_license("All rights reserved")

        #Copyright details
        if llr.metadata_language == "S":
            llr.set_copyright_details(
                u"© FAO. Para garantizar la amplia difusión de su información, la FAO se ha comprometido a poner libremente a disposición de los interesados este contenido y alienta el uso, la reproducción y la difusión de los textos, productos multimedia y datos presentados. Salvo que se indique lo contrario, el contenido se puede copiar, imprimir y descargar con fines de estudio privado, investigación y docencia, y para uso en productos o servicios no comerciales, siempre que se reconozca de forma adecuada a la FAO como fuente y titular de los derechos de autor  y que no se indique o que ello implique en modo alguno que la FAO aprueba los puntos de vista, productos o servicios de los usuarios."
            )
        elif llr.metadata_language == "F":
            llr.set_copyright_details(
                u"© FAO. Afin d’assurer une large diffusion de ses informations, la FAO s’attache à donner libre accès à ce contenu et encourage l’utilisation, la reproduction et la diffusion des données, des informations textuelles et des supports multimédia présentés. Sauf indication contraire, le contenu peut être reproduit, imprimé et téléchargé aux fins d’étude privée, de recherches ou d’enseignement ainsi que pour utilisation dans des produits ou services non commerciaux, sous réserve que la FAO soit correctement mentionnée comme source et comme titulaire du droit d’auteur et à condition qu’il ne soit ni déclaré ni sous-entendu en aucune manière que la FAO approuverait les opinions, produits ou services des utilisateurs."
            )
        else:
            llr.set_copyright_details(
                u"© FAO. FAO is committed to making its content freely available and encourages the use, reproduction and dissemination of the text, multimedia and data presented. Except where otherwise indicated, content may be copied, printed and downloaded for private study, research and teaching purposes, and for use in non-commercial products or services, provided that appropriate acknowledgement of FAO as the source and copyright holder is given and that FAO's endorsement of users' views, products or services is not stated or implied in any way."
            )
        #data provider
        llr.set_data_provider(u"FAO Legal Office")

        #date.
        dateOfText = get_value_metatag(value, 'dateOfText')
        dateOfOriginalText = get_value_metatag(value, 'dateOfOriginalText')
        if dateOfText is not None:
            llr.set_date(dateOfText)
        else:
            llr.set_date(dateOfOriginalText)

        #geographical focus. list
        country_ISO3_list = get_values_metatag(value, 'country_ISO3')
        geographicalArea_en_list = get_values_metatag(value,
                                                      'geographicalArea_en')

        countries_cleaned = set([x for x in country_ISO3_list if x != "EUR"])
        regions_cleaned = set(
            llrutils.flatten(
                filter(None, [
                    utils.getUNM49code(region)
                    for region in geographicalArea_en_list
                ])))

        llr.set_geographical_focus(countries_cleaned, regions_cleaned)

        #landvoc concepts
        #keywordCode_list = [x['MT']['V'] for x in value['R']['mts'] if x['MT']['N']=='keywordCode']
        potential_concepts = get_values_metatag(value, 'keyword_en')
        direct_mapping = list(
        )  #LANDVOC.get_concepts_direct(potential_concepts)
        related_mapping = LANDVOCFAOLEX.get_concepts_faolex_related(
            potential_concepts)
        concepts = set(direct_mapping + related_mapping)
        concepts = filter(None, concepts)
        llr.set_concepts(concepts)

        themes = LANDVOC.get_fixed_themes(concepts)
        oacs = LANDVOC.get_fixed_oacs(concepts) | set(
            [u"Land Policy & Legislation"])

        #Overarching categories
        llr.set_overarching_categories(oacs)

        #Themes
        llr.set_themes(themes)

        #related website. Only one
        relatedWebSite = get_value_metatag(value, 'relatedWebSite')
        #llr.set_related_website(relatedWebSite)

        if relatedWebSite:
            publishers = utils.get_publisher(relatedWebSite)
            if publishers:
                llr.set_publishers([publishers])

        #Serial imprint (Source in the website). Only one
        serialImprint = get_value_metatag(value, 'serialImprint')
        #llr.set_serialImprint(serialImprint)

        #language. Only one
        gsaentity_google_language = get_value_metatag(
            value, 'gsaentity_google_language')

        #Could be more than one value
        documentLanguage_en = get_values_metatag(value, 'documentLanguage_en')
        if len(documentLanguage_en) == 1:
            language = documentLanguage_en[0]
        elif (gsaentity_google_language in documentLanguage_en):
            language = gsaentity_google_language
        else:
            language = gsaentity_google_language  #some corner cases
        language = llrutils.getISO639_1code(language)
        llr.set_languages([language])

        #implementedBy = get_values_metatag(value, 'implementedBy')
        implementedByTitle = get_values_metatag(value, 'implementedByTitle')
        implementedByDate = get_values_metatag(value, 'implementedByDate')
        implementedBy = generate_aditional_text(implementedByTitle,
                                                implementedByDate,
                                                "Implemented by: ",
                                                llr.metadata_language)

        #implement = get_values_metatag(value, 'implement')
        implementTitle = get_values_metatag(value, 'implementTitle')
        implementDate = get_values_metatag(value, 'implementDate')
        implement = generate_aditional_text(implementTitle, implementDate,
                                            "Implements: ",
                                            llr.metadata_language)

        #amendedBy = get_values_metatag(value, 'amendedBy')
        amendedByTitle = get_values_metatag(value, 'amendedByTitle')
        amendedByDate = get_values_metatag(value, 'amendedByDate')
        amendedBy = generate_aditional_text(amendedByTitle, amendedByDate,
                                            "Amended by: ",
                                            llr.metadata_language)

        #amends = get_values_metatag(value, 'amends')
        amendsTitle = get_values_metatag(value, 'amendsTitle')
        amendsDate = get_values_metatag(value, 'amendsDate')
        amends = generate_aditional_text(amendsTitle, amendsDate, "Amends: ",
                                         llr.metadata_language)

        #repealedBy = get_values_metatag(value, 'repealedBy')
        repealedByTitle = get_values_metatag(value, 'repealedByTitle')
        repealedByDate = get_values_metatag(value, 'repealedByDate')
        repealedBy = generate_aditional_text(repealedByTitle, repealedByDate,
                                             "Repealed by: ",
                                             llr.metadata_language)

        #repeals = get_values_metatag(value, 'repeals')
        repealsTitle = get_values_metatag(value, 'repealsTitle')
        repealsDate = get_values_metatag(value, 'repealsDate')
        repeals = generate_aditional_text(repealsTitle, repealsDate,
                                          "Repeals: ", llr.metadata_language)

        full_description = description
        full_description += "\n"
        full_description += implementedBy
        full_description += implement
        full_description += amendedBy
        full_description += amends
        full_description += repealedBy
        full_description += repeals

        llr.set_description(full_description)

        llrs.append(llr)

    print "**********************************"
    print "new records without repealed (Set)= %d" % (len(llrs))
    return llrs
Esempio n. 7
0
def create_llr_from_lareferencia_record(lareferencia_record):
    llr = LandLibraryResource()


    #ID. Only one.
    internal_id = lareferencia_record["id"]
    llr.set_id(u"LaReferencia:"+internal_id)
    
    
    #title. One
    title = lareferencia_record["title"]
    llr.set_title(title)

    
    #subtitle. Zero or One
    if "subtitle" in lareferencia_record:
        subtitle = lareferencia_record["subtitle"]
        llr.set_subtitle(subtitle)


    # description. Zero or One
    if "summary" in lareferencia_record:
        description = lareferencia_record["summary"][0]
        if description:
            llr.set_description(description)
    
    #Language. Zero, one or more
    langs_cleared = set()
    if "languages" in lareferencia_record:
        languages = lareferencia_record["languages"]
        
        for lang in languages:
            langs_cleared.add(llrutils.getISO639_1code_from_ISO639_3code(lang))
        langs_cleared = set(filter(None,langs_cleared))
    
    if not langs_cleared:        
        try:
            potential_lang = detect(title.lower())
            if potential_lang in ["es", "pt", "en"]:
                langs_cleared.add(potential_lang)
        except LangDetectException:
            pass
    llr.set_languages(langs_cleared)

    #author. One or more
    authors = lareferencia_record["primaryAuthors"]
    if "secondaryAuthors" in lareferencia_record:
        authors+=lareferencia_record["secondaryAuthors"]
    llr.set_authors(authors)
    

    #corporate_authors. Could be more than one
    if "corporateAuthors" in lareferencia_record:
        llr.set_corporate_authors(lareferencia_record["corporateAuthors"])


    #publishers. Zero, one or more
    if "dc.publisher.none.fl_str_mv" in lareferencia_record["rawData"]:
        llr.set_publishers(filter(None,{utils.getPublisher(pub) for pub in lareferencia_record["rawData"]["dc.publisher.none.fl_str_mv"]}))

    #type. One
    types= set()
    formats = lareferencia_record["formats"]
    types.add(utils.getLLR_type(formats[0]))
    if "dc.type.none.fl_str_mv" in lareferencia_record["rawData"]:
        for f in lareferencia_record["rawData"]["dc.type.none.fl_str_mv"]:
            if f=="Artículos de congreso":
                types.add("Conference Papers & Reports")
            if f=="Articulo evaluado por dos pares" or f=='artículos evaluados por pares'  or f=='Artículo evaluado por pares ciegos y producto de investigación' or f=='Artículo evaluado por pares' or f=="Art?culo revisado por pares" or f=='Artículo revisado por pares':
                types.add("Peer-reviewed publication")
    llr.set_types(list(types))

    #number of pages. Only one
    #If there is a last page, there is an initial page
    if "dc.description.lastpage.pt.fl_txt_mv" in lareferencia_record["rawData"]:
        lastpage = lareferencia_record["rawData"]["dc.description.lastpage.pt.fl_txt_mv"][0]
        initialpage = lareferencia_record["rawData"]["dc.description.initialpage.pt.fl_txt_mv"][0]
        number_pages = int(lastpage) - int(initialpage)
        if number_pages:
            llr.set_number_pages(number_pages)

    #date.
    publicationDates = lareferencia_record["publicationDates"][0]
    best_date = publicationDates
    if "dc.date.none.fl_str_mv" in lareferencia_record["rawData"]:
        for potentialDate in lareferencia_record["rawData"]["dc.date.none.fl_str_mv"]:
            if publicationDates in potentialDate:
                best_date = utils.clean_date(potentialDate.split("T")[0])
    llr.set_date(utils.clean_date(best_date))


    #original url. Only one
    lareferencia_url = "http://www.lareferencia.info/vufind/Record/"+internal_id
    llr.set_original_url(lareferencia_url)


    #resource url. Only one. Remove locahost
    resource_url = None
    if "bitstream.url.fl_str_mv" in lareferencia_record["rawData"]:
        potential_urls = lareferencia_record["rawData"]["bitstream.url.fl_str_mv"]
        if len(potential_urls)==1 and ("://localhost" not in potential_urls[0]):
            resource_url = potential_urls[0]
        else:
            for url in potential_urls:
                if "://localhost" in url:
                    continue
                if url.endswith(".pdf") or url.endswith(".PDF"):
                    resource_url = url
            if not resource_url and ("://localhost" not in url):
                resource_url = potential_urls[0] # arbitray. Take the first one
    elif "url" in lareferencia_record["rawData"]:
        resource_url = lareferencia_record["rawData"]["url"][0]

    llr.set_resource_url(resource_url)

    #License
    license_llr = None
    copyright_details = None
    if "dc.rights.none.fl_str_mv" in lareferencia_record["rawData"]:
        for potential_license in lareferencia_record["rawData"]["dc.rights.none.fl_str_mv"]:
            if not llrutils.checkOpenAccess(potential_license):
                return None # STOP. Return None
            if "info:eu-repo/semantics/openAccess" in potential_license:
                copyright_details = "info:eu-repo/semantics/openAccess : Open Access, this refers to access without restrictions, and without financial incentives. Access to the resource is gained directly, without any obstacles." #From https://wiki.surfnet.nl/display/standards/info-eu-repo/#info-eu-repo-AccessRights    if "rights_invalid_str_mv" in lareferencia_record["rawData"]:
    if "rights_invalid_str_mv" in lareferencia_record["rawData"]:
        for potential_license in lareferencia_record["rawData"]["rights_invalid_str_mv"]:
            if not llrutils.checkOpenAccess(potential_license):
                return None # STOP. Return None
            if "Copyright" in potential_license:
                copyright_details = potential_license
            if "creativecommons.org" in potential_license:
                license_llr = llrutils.getCCLicenseAcronym(potential_license)
            if "info:eu-repo/semantics/openAccess" in potential_license and not copyright_details:
                copyright_details = "info:eu-repo/semantics/openAccess : Open Access, this refers to access without restrictions, and without financial incentives. Access to the resource is gained directly, without any obstacles." #From https://wiki.surfnet.nl/display/standards/info-eu-repo/#info-eu-repo-AccessRights    if "rights_invalid_str_mv" in lareferencia_record["rawData"]:
    if "dc.rights.driver.fl_str_mv" in lareferencia_record["rawData"]:
        for potential_license in lareferencia_record["rawData"]["dc.rights.driver.fl_str_mv"]:
            if not llrutils.checkOpenAccess(potential_license):
                return None # STOP. Return None
            if "Copyright" in potential_license:
                copyright_details = potential_license
            if "creativecommons.org" in potential_license:
                license_llr = llrutils.getCCLicenseAcronym(potential_license)
            if "info:eu-repo/semantics/openAccess" in potential_license and not copyright_details:
                copyright_details = "info:eu-repo/semantics/openAccess : Open Access, this refers to access without restrictions, and without financial incentives. Access to the resource is gained directly, without any obstacles." #From https://wiki.surfnet.nl/display/standards/info-eu-repo/#info-eu-repo-AccessRights    if "rights_invalid_str_mv" in lareferencia_record["rawData"]:
    llr.set_license(license_llr)
    llr.set_copyright_details(copyright_details)

    #data provider
    llr.set_data_provider(u"LA Referencia")

    #image
    #llr.set_image("")

    #keywords
    potential_subjects=set()
    for subject in lareferencia_record["subjects"]:
        potential_subjects.add(LANDVOC.get_EnglishPrefLabel(subject[0],lang="es"))
        potential_subjects.add(LANDVOC.get_EnglishPrefLabel(subject[0],lang="en"))
        potential_subjects.add(LANDVOC.get_EnglishPrefLabel(subject[0],lang="pt"))

    concepts = [unicode(s, "utf-8") for s in filter(None,potential_subjects)]
    themes=LANDVOC.get_fixed_themes(concepts)
    oacs=LANDVOC.get_fixed_oacs(concepts)

    llr.set_concepts(concepts);
    llr.set_themes(themes);
    llr.set_overarching_categories(oacs);

    #geographical focus. list
    countries_focus = set()
    countries_focus |= set(utils.getPlaceET_fromText_NLTK(llr.title)) | set(utils.getPlaceET_fromText_GeoText(llr.title))
    countries_focus |= set(utils.getPlaceET_fromText_NLTK(llr.description)) | set(utils.getPlaceET_fromText_GeoText(llr.description))
    for subject in lareferencia_record["subjects"]:
        countries_focus |= set(utils.getPlaceET_fromText_NLTK(subject[0])) | set(utils.getPlaceET_fromText_GeoText(subject[0]))
    llr.set_geographical_focus(countries_focus, set())
    
    return llr
Esempio n. 8
0
def create_llr_from_RIS(ris_entry):
    llr_record = LandLibraryResource()

    #type. Only one
    if ris_entry["type_of_reference"]=="EJOU":
        llr_record.set_type(u"Peer-reviewed publication")

    #title. Only one
    title = ris_entry["title"]
    llr_record.set_title(title.decode('utf-8'))

    #subtitle

    # description
    description = ris_entry["abstract"]
    llr_record.set_description(description.decode('utf-8'))

    #author. One or more
    authors = ris_entry["authors"]
    llr_record.set_authors(authors)

    #corporate_authors. Could be more than one

    #publishers. One or more (and more could be in the same item or in a different one
    publishers = [u"Land Journal"]
    llr_record.set_publishers(publishers)

    #ID. Only one.
    doi = ris_entry["doi"]
    llr_record.set_id(doi)
    
    #number of pages. Only one

    #date.
    year = ris_entry["year"] # format (YYYY)
    issue = ris_entry["number"]
    month = {
        "1": "03",
        "2" : "06",
        "3" : "09",
        "4" : "12",
    }[issue]
    publish_date = year+"-"+month+"-31"# format (YYYY-MM-DD)
    llr_record.set_date(publish_date)


    volume = ris_entry["volume"]
    init_page=str(int(doi[-4:]))
    original_url = "http://www.mdpi.com/2073-445X/"+volume+"/"+issue+"/"+init_page+"/"
    resource_url = original_url+"pdf"
    
    #original url. Only one
    llr_record.set_original_url(original_url)
    
    #resource url. Only one
    llr_record.set_resource_url(resource_url)


#     #plan B
#     resource_url = "http://dx.doi.org/"+doi
#     resp = requests.head(resource_url)
#     location = resp.headers["Location"]
    
    #License
    llr_record.set_license(u"Creative Commons Attribution")

    #Copyright details
    copyright_details = u"© "+year+" by the authors; licensee MDPI, Basel, Switzerland. This article is an open access article."
    llr_record.set_copyright_details(copyright_details)

    #data provider
    llr_record.set_data_provider(u"Land Journal")

    #image
    llr_record.set_image("private://feeds/LandJournal-thumbnail.png")

    #keywords
    keywords = ris_entry["keywords"]

    #geographical focus. list
    countries = set(utils.flatten(filter(None,[utils.getISO3166_1code(k) for k in keywords])))
    regions = set(utils.flatten(filter(None,[utils.getUNM49code(k) for k in keywords])))
    llr_record.set_geographical_focus(countries, regions)


    concepts = LANDVOC.get_concepts_direct(keywords)
    themes=LANDVOC.get_fixed_themes(concepts)
    oacs=LANDVOC.get_fixed_oacs(concepts)

    llr_record.set_concepts(concepts);
    llr_record.set_themes(themes);
    llr_record.set_overarching_categories(oacs);

    #Language
    lang = u"en"
    llr_record.set_language(lang)
    
    return llr_record