Beispiel #1
0
def test():
    base_dir = os.path.join(os.getcwd(), "data")
    print "base_dir: {}".format(base_dir)

    #query_string = "s.q=europe&s.cmd%5B%5D=addFacetValueFilters%28IsFullText%2Ctrue%29&s.fq%5B%5D=SourceType%3A%28%22Library+Catalog%22%29&s.fvf%5B%5D=ContentType%2CNewspaper+Article%2Ct&s.fvf%5B%5D=ContentType%2CBook+Review%2Ct&s.fvf%5B%5D=ContentType%2CDissertation%2Ct"
    #query_string = "s.cmd=addFacetValueFilters(ContentType,Book+Review)&s.fvf%5B%5D=ContentType,Journal+Article,f&s.fvf%5B%5D=ContentType,Dissertation,t&s.fvf%5B%5D=IsFullText,true,f&s.fvf%5B%5D=ContentType,Newspaper+Article,t&s.q=europe"
    #query_string = "s.cmd=addFacetValueFilters(ContentType,Journal+Article)&s.fvf=IsFullText,true,f&s.q=europe+federal"
    #query_string = "s.fvf%5B%5D=ContentType%2CJournal+Article%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.q=europe+federal&s.cmd=addFacetValueFilters(ContentType,Dissertation)%20removeFacetValueFilter(ContentType,Journal%20Article)"
    #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf&s.fvf%5B%5D=ContentType%2CData+Set%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.fvf%5B%5D=ContentType%2CNewspaper+Article%2Cf&s.fvf%5B%5D=ContentType%2CTrade+Publication+Article%2Cf&s.ps=50&s.q=europe+federal&s.cmd=removeFacetValueFilter(ContentType,Newspaper%20Article)%20removeFacetValueFilter(ContentType,Trade%20Publication%20Article)%20removeFacetValueFilter(ContentType,Data%20Set)"
    #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.ps=50&s.q=europe+federal&s.cmd=addFacetValueFilters(ContentType,Data+Set)"
    query_string = "s.q=Organized+Crime+and+States"
    # Thesis:
    #query_string = "s.cmd=addFacetValueFilters(ContentType,Dissertation)&s.q=social"
    # Video:
    #query_string = "s.q=social&s.fvf%5B%5D=ContentType%2CVideo+Recording%2Cf"
    # EditedBook
    #query_string = "s.q=ouvrage+collectif&s.cmd=addFacetValueFilters(ContentType,Book+%2F+eBook)"
    #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf"
    summon_response = summon.summon_query(query_string)
    metajson_list = crosswalks_service.convert_json(
        summon_response, constants.FORMAT_SUMMONJSON, "summon", False)
    collection = metajson_service.create_collection("summon_test",
                                                    "Summon Test",
                                                    metajson_list)
    print jsonbson.dumps_json(collection, True)
Beispiel #2
0
def test_search():
    search_query = {"filter_class": "Document"}
    search_query["filter_date_begin"] = "2010"
    search_query["filter_date_end"] = "2013"
    search_query["filter_languages"] = ["en", "fr"]
    search_query["filter_types"] = ["Book", "BookPart"]
    search_query["rec_class"] = "SearchQuery"
    search_query["rec_metajson"] = 1
    search_query["result_batch_size"] = 100
    search_query["result_bibliographic_styles"] = ["mla"]
    search_query["result_offset"] = 0
    search_query["result_sorts"] = [{"field": "rec_type", "order": "asc"}]
    search_query["search_terms"] = [{
        "index": "title",
        "operator": "and",
        "value": "Cheyenne"
    }, {
        "index": "title",
        "operator": "or",
        "value": "technique"
    }]

    print "search_query:"
    print jsonbson.dumps_json(search_query, True)

    search_result = repository_service.search(None, search_query)

    print "search_result:"
    print jsonbson.dumps_bson(search_result, True)
def test_search():
    search_query = {"filter_class": "Document"}
    search_query["filter_date_begin"] = "2010"
    search_query["filter_date_end"] = "2013"
    search_query["filter_languages"] = ["en", "fr"]
    search_query["filter_types"] = ["Book", "BookPart"]
    search_query["rec_class"] = "SearchQuery"
    search_query["rec_metajson"] = 1
    search_query["result_batch_size"] = 100
    search_query["result_bibliographic_styles"] = ["mla"]
    search_query["result_offset"] = 0
    search_query["result_sorts"] = [{"field": "rec_type", "order": "asc"}]
    search_query["search_terms"] = [{"index": "title", "operator": "and", "value": "Cheyenne"}, {"index": "title", "operator": "or", "value": "technique"}]

    print "search_query:"
    print jsonbson.dumps_json(search_query, True)

    search_result = repository_service.search(None, search_query)

    print "search_result:"
    print jsonbson.dumps_bson(search_result, True)
def test():
    base_dir = os.path.join(os.getcwd(), "data")
    print "base_dir: {}".format(base_dir)

    #query_string = "s.q=europe&s.cmd%5B%5D=addFacetValueFilters%28IsFullText%2Ctrue%29&s.fq%5B%5D=SourceType%3A%28%22Library+Catalog%22%29&s.fvf%5B%5D=ContentType%2CNewspaper+Article%2Ct&s.fvf%5B%5D=ContentType%2CBook+Review%2Ct&s.fvf%5B%5D=ContentType%2CDissertation%2Ct"
    #query_string = "s.cmd=addFacetValueFilters(ContentType,Book+Review)&s.fvf%5B%5D=ContentType,Journal+Article,f&s.fvf%5B%5D=ContentType,Dissertation,t&s.fvf%5B%5D=IsFullText,true,f&s.fvf%5B%5D=ContentType,Newspaper+Article,t&s.q=europe"
    #query_string = "s.cmd=addFacetValueFilters(ContentType,Journal+Article)&s.fvf=IsFullText,true,f&s.q=europe+federal"
    #query_string = "s.fvf%5B%5D=ContentType%2CJournal+Article%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.q=europe+federal&s.cmd=addFacetValueFilters(ContentType,Dissertation)%20removeFacetValueFilter(ContentType,Journal%20Article)"
    #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf&s.fvf%5B%5D=ContentType%2CData+Set%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.fvf%5B%5D=ContentType%2CNewspaper+Article%2Cf&s.fvf%5B%5D=ContentType%2CTrade+Publication+Article%2Cf&s.ps=50&s.q=europe+federal&s.cmd=removeFacetValueFilter(ContentType,Newspaper%20Article)%20removeFacetValueFilter(ContentType,Trade%20Publication%20Article)%20removeFacetValueFilter(ContentType,Data%20Set)"
    #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.ps=50&s.q=europe+federal&s.cmd=addFacetValueFilters(ContentType,Data+Set)"
    query_string = "s.q=Organized+Crime+and+States"
    # Thesis:
    #query_string = "s.cmd=addFacetValueFilters(ContentType,Dissertation)&s.q=social"
    # Video:
    #query_string = "s.q=social&s.fvf%5B%5D=ContentType%2CVideo+Recording%2Cf"
    # EditedBook
    #query_string = "s.q=ouvrage+collectif&s.cmd=addFacetValueFilters(ContentType,Book+%2F+eBook)"
    #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf"
    summon_response = summon.summon_query(query_string)
    metajson_list = crosswalks_service.convert_json(summon_response, constants.FORMAT_SUMMONJSON, "summon", False)
    collection = metajson_service.create_collection("summon_test", "Summon Test", metajson_list)
    print jsonbson.dumps_json(collection, True)
def retrieve_google_types(app):
    spreadsheet = GSpreadsheet(key=config["google"]["conf_key"], email=config["google"]["email"], password=config["google"]["password"])
    keys = {}
    type_worksheets = get_google_types_worksheets(spreadsheet)
    for ws_id, ws_name in type_worksheets:
        logging.debug('worksheet code: {}, name: {}'.format(ws_id, ws_name))
        worksheet = GSpreadsheet(worksheet=ws_id, key=config["google"]["conf_key"], email=config["google"]["email"], password=config["google"]["password"])
        type_bundle = google_worksheet_to_type(ws_name, worksheet, app, keys)
        type_bundle_id = type_bundle["type_id"]
        type_bundle_dump = jsonbson.dumps_json(type_bundle)
        type_bundle_path = os.path.abspath(os.path.join(os.getcwd(), 'biblib', 'conf', 'types', type_bundle_id + ".json"))
        logging.debug(type_bundle_dump)
        with open(type_bundle_path, "w") as type_bundle_file:
            type_bundle_file.write(type_bundle_dump)
    logging.debug(jsonbson.jsonbson(keys))
Beispiel #6
0
def retrieve_google_types(app):
    spreadsheet = GSpreadsheet(key=config["google"]["conf_key"],
                               email=config["google"]["email"],
                               password=config["google"]["password"])
    keys = {}
    type_worksheets = get_google_types_worksheets(spreadsheet)
    for ws_id, ws_name in type_worksheets:
        logging.debug('worksheet code: {}, name: {}'.format(ws_id, ws_name))
        worksheet = GSpreadsheet(worksheet=ws_id,
                                 key=config["google"]["conf_key"],
                                 email=config["google"]["email"],
                                 password=config["google"]["password"])
        type_bundle = google_worksheet_to_type(ws_name, worksheet, app, keys)
        type_bundle_id = type_bundle["type_id"]
        type_bundle_dump = jsonbson.dumps_json(type_bundle)
        type_bundle_path = os.path.abspath(
            os.path.join(os.getcwd(), 'biblib', 'conf', 'types',
                         type_bundle_id + ".json"))
        logging.debug(type_bundle_dump)
        with open(type_bundle_path, "w") as type_bundle_file:
            type_bundle_file.write(type_bundle_dump)
    logging.debug(jsonbson.jsonbson(keys))
Beispiel #7
0
def document(item):
    #logging.debug(DOCUMENTS)
    url = config["endpoint"] + DOCUMENTS
    #logging.debug(url)
    #logging.debug(item)
    if "tags" in item:
        tags = jsonbson.dumps_json(item["tags"])
        del item["tags"]
        logging.debug(tags)
        item["tags"] = tags
    params_encoded = urllib.urlencode(item)
    logging.debug(params_encoded)
    request = urllib2.Request(url, params_encoded)
    response = urllib2.urlopen(request)
    result = response.read()
    #logging.debug(result)
    return jsonbson.load_json_str(result)
    try:
        pass
    except:
        logging.error("*** Error uploading item to sven : {}".format(item["title"]))
        return {"status": "error"}
Beispiel #8
0
def csv_dict_reader_to_metasjon(csv_row, input_format, source, rec_id_prefix):
    document = Document()

    if source:
        document["rec_source"] = source

    if input_format == constants.FORMAT_CSV_SITPOL:
        #logging.debug("csv_dict_reader_to_metasjon type(csv_row): {}".format(type(csv_row)))
        #print csv_row
        document["title"] = csv_row["title"]
        classifications_sitpol = [x.strip() for x in csv_row["classifications_sitpol"].split(";") if x.strip()]
        if classifications_sitpol:
            document["classifications_sitpol"] = classifications_sitpol
        classifications_ddc = [x.strip() for x in csv_row["classifications_ddc"].split(";") if x.strip()]
        if classifications_ddc:
            document["classifications_ddc"] = classifications_ddc
        formatted_names = [x.strip() for x in csv_row["creators@role=pbl"].split(";") if x.strip()]
        if formatted_names:
            #logging.debug("formatted_names: {}".format(formatted_names))
            creators = []
            for formatted_name in formatted_names:
                if formatted_name:
                    creator = creator_service.formatted_name_to_creator(formatted_name, None, "pbl")
                    if creator:
                        creators.append(creator)
            if creators:
                document["creators"] = creators
        document["date_last_accessed"] = datetime.now().isoformat()
        document["descriptions"] = [{"language":"fr", "value":csv_row["descriptions@lang=fr"]}]
        keywords_fr = [x.strip() for x in csv_row["keywords@lang=fr"].split(";") if x.strip()]
        keywords_en = [x.strip() for x in csv_row["keywords@lang=en"].split(";") if x.strip()]
        keywords = {}
        if keywords_fr:
            keywords["fr"] = keywords_fr
        if keywords_en:
            keywords["en"] = keywords_en
        if keywords:
            document["keywords"] = keywords
        document["languages"] = [x.strip() for x in csv_row["languages"].split(";") if x.strip()]
        note = csv_row["notes@lang=fr"]
        if note:
            document["notes"] = note
        document["publication_countries"] = [x.strip() for x in csv_row["publication_countries"].split(";") if x.strip()]
        if "rec_created_user" in csv_row:
            document["rec_created_user"] = csv_row["rec_created_user"]
        document["rec_type_cerimes"] = csv_row["rec_type_cerimes"]
        specific_agents = [x.strip() for x in csv_row["specific_agents"].split(";") if x.strip()]
        if specific_agents:
            document["specific_agents"] = specific_agents
        document["specific_actor_type"] = csv_row["specific_actor_type"]
        document["target_audiences_cerimes"] = csv_row["target_audiences_cerimes"]
        document["url"] = csv_row["url"]
        document["rec_type"] = constants.DOC_TYPE_WEBENTITY
        document["webentity_type"] = csv_row["webentity_type"]
    elif input_format == constants.FORMAT_CSV_METAJSON:
        document["rec_type"] = "DatasetQuali"
        creators = []
        if "Laboratoire d'inventaire" in csv_row:
            creators.append(creator_service.formatted_name_to_creator(csv_row["Laboratoire d'inventaire"], constants.REC_CLASS_ORGUNIT, "dpt"))
        document["title"] = csv_row["Titre de l'enquete"]
        if "Sujet(s) de l'enquete" in csv_row:
            document["keywords"] = [x.strip() for x in csv_row["Sujet(s) de l'enquete"].split("\n") if x.strip()]

        if "Nom Auteur 1" in csv_row:
            name_familly = csv_row["Nom Auteur 1"]
            name_given = affiliation = ""
            if "Prenom Auteur 1" in csv_row:
                name_given = csv_row["Prenom Auteur 1"]
            if "Affiliation Auteur 1" in csv_row:
                affiliation = csv_row["Affiliation Auteur 1"]


        document["creators"] = creators
    else:
        logging.error("Unknown input_format: {}".format(input_format))

    logging.info(jsonbson.dumps_json(document, True))
    return document
def dump_result(result):
    print jsonbson.dumps_json(result, True)
Beispiel #10
0
def dump_result(result):
    print jsonbson.dumps_json(result, True)