def test(): base_dir = os.path.join(os.getcwd(), "data") print "base_dir: {}".format(base_dir) #query_string = "s.q=europe&s.cmd%5B%5D=addFacetValueFilters%28IsFullText%2Ctrue%29&s.fq%5B%5D=SourceType%3A%28%22Library+Catalog%22%29&s.fvf%5B%5D=ContentType%2CNewspaper+Article%2Ct&s.fvf%5B%5D=ContentType%2CBook+Review%2Ct&s.fvf%5B%5D=ContentType%2CDissertation%2Ct" #query_string = "s.cmd=addFacetValueFilters(ContentType,Book+Review)&s.fvf%5B%5D=ContentType,Journal+Article,f&s.fvf%5B%5D=ContentType,Dissertation,t&s.fvf%5B%5D=IsFullText,true,f&s.fvf%5B%5D=ContentType,Newspaper+Article,t&s.q=europe" #query_string = "s.cmd=addFacetValueFilters(ContentType,Journal+Article)&s.fvf=IsFullText,true,f&s.q=europe+federal" #query_string = "s.fvf%5B%5D=ContentType%2CJournal+Article%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.q=europe+federal&s.cmd=addFacetValueFilters(ContentType,Dissertation)%20removeFacetValueFilter(ContentType,Journal%20Article)" #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf&s.fvf%5B%5D=ContentType%2CData+Set%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.fvf%5B%5D=ContentType%2CNewspaper+Article%2Cf&s.fvf%5B%5D=ContentType%2CTrade+Publication+Article%2Cf&s.ps=50&s.q=europe+federal&s.cmd=removeFacetValueFilter(ContentType,Newspaper%20Article)%20removeFacetValueFilter(ContentType,Trade%20Publication%20Article)%20removeFacetValueFilter(ContentType,Data%20Set)" #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.ps=50&s.q=europe+federal&s.cmd=addFacetValueFilters(ContentType,Data+Set)" query_string = "s.q=Organized+Crime+and+States" # Thesis: #query_string = "s.cmd=addFacetValueFilters(ContentType,Dissertation)&s.q=social" # Video: #query_string = "s.q=social&s.fvf%5B%5D=ContentType%2CVideo+Recording%2Cf" # EditedBook #query_string = "s.q=ouvrage+collectif&s.cmd=addFacetValueFilters(ContentType,Book+%2F+eBook)" #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf" summon_response = summon.summon_query(query_string) metajson_list = crosswalks_service.convert_json( summon_response, constants.FORMAT_SUMMONJSON, "summon", False) collection = metajson_service.create_collection("summon_test", "Summon Test", metajson_list) print jsonbson.dumps_json(collection, True)
def test_search(): search_query = {"filter_class": "Document"} search_query["filter_date_begin"] = "2010" search_query["filter_date_end"] = "2013" search_query["filter_languages"] = ["en", "fr"] search_query["filter_types"] = ["Book", "BookPart"] search_query["rec_class"] = "SearchQuery" search_query["rec_metajson"] = 1 search_query["result_batch_size"] = 100 search_query["result_bibliographic_styles"] = ["mla"] search_query["result_offset"] = 0 search_query["result_sorts"] = [{"field": "rec_type", "order": "asc"}] search_query["search_terms"] = [{ "index": "title", "operator": "and", "value": "Cheyenne" }, { "index": "title", "operator": "or", "value": "technique" }] print "search_query:" print jsonbson.dumps_json(search_query, True) search_result = repository_service.search(None, search_query) print "search_result:" print jsonbson.dumps_bson(search_result, True)
def test_search(): search_query = {"filter_class": "Document"} search_query["filter_date_begin"] = "2010" search_query["filter_date_end"] = "2013" search_query["filter_languages"] = ["en", "fr"] search_query["filter_types"] = ["Book", "BookPart"] search_query["rec_class"] = "SearchQuery" search_query["rec_metajson"] = 1 search_query["result_batch_size"] = 100 search_query["result_bibliographic_styles"] = ["mla"] search_query["result_offset"] = 0 search_query["result_sorts"] = [{"field": "rec_type", "order": "asc"}] search_query["search_terms"] = [{"index": "title", "operator": "and", "value": "Cheyenne"}, {"index": "title", "operator": "or", "value": "technique"}] print "search_query:" print jsonbson.dumps_json(search_query, True) search_result = repository_service.search(None, search_query) print "search_result:" print jsonbson.dumps_bson(search_result, True)
def test(): base_dir = os.path.join(os.getcwd(), "data") print "base_dir: {}".format(base_dir) #query_string = "s.q=europe&s.cmd%5B%5D=addFacetValueFilters%28IsFullText%2Ctrue%29&s.fq%5B%5D=SourceType%3A%28%22Library+Catalog%22%29&s.fvf%5B%5D=ContentType%2CNewspaper+Article%2Ct&s.fvf%5B%5D=ContentType%2CBook+Review%2Ct&s.fvf%5B%5D=ContentType%2CDissertation%2Ct" #query_string = "s.cmd=addFacetValueFilters(ContentType,Book+Review)&s.fvf%5B%5D=ContentType,Journal+Article,f&s.fvf%5B%5D=ContentType,Dissertation,t&s.fvf%5B%5D=IsFullText,true,f&s.fvf%5B%5D=ContentType,Newspaper+Article,t&s.q=europe" #query_string = "s.cmd=addFacetValueFilters(ContentType,Journal+Article)&s.fvf=IsFullText,true,f&s.q=europe+federal" #query_string = "s.fvf%5B%5D=ContentType%2CJournal+Article%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.q=europe+federal&s.cmd=addFacetValueFilters(ContentType,Dissertation)%20removeFacetValueFilter(ContentType,Journal%20Article)" #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf&s.fvf%5B%5D=ContentType%2CData+Set%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.fvf%5B%5D=ContentType%2CNewspaper+Article%2Cf&s.fvf%5B%5D=ContentType%2CTrade+Publication+Article%2Cf&s.ps=50&s.q=europe+federal&s.cmd=removeFacetValueFilter(ContentType,Newspaper%20Article)%20removeFacetValueFilter(ContentType,Trade%20Publication%20Article)%20removeFacetValueFilter(ContentType,Data%20Set)" #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.ps=50&s.q=europe+federal&s.cmd=addFacetValueFilters(ContentType,Data+Set)" query_string = "s.q=Organized+Crime+and+States" # Thesis: #query_string = "s.cmd=addFacetValueFilters(ContentType,Dissertation)&s.q=social" # Video: #query_string = "s.q=social&s.fvf%5B%5D=ContentType%2CVideo+Recording%2Cf" # EditedBook #query_string = "s.q=ouvrage+collectif&s.cmd=addFacetValueFilters(ContentType,Book+%2F+eBook)" #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf" summon_response = summon.summon_query(query_string) metajson_list = crosswalks_service.convert_json(summon_response, constants.FORMAT_SUMMONJSON, "summon", False) collection = metajson_service.create_collection("summon_test", "Summon Test", metajson_list) print jsonbson.dumps_json(collection, True)
def retrieve_google_types(app): spreadsheet = GSpreadsheet(key=config["google"]["conf_key"], email=config["google"]["email"], password=config["google"]["password"]) keys = {} type_worksheets = get_google_types_worksheets(spreadsheet) for ws_id, ws_name in type_worksheets: logging.debug('worksheet code: {}, name: {}'.format(ws_id, ws_name)) worksheet = GSpreadsheet(worksheet=ws_id, key=config["google"]["conf_key"], email=config["google"]["email"], password=config["google"]["password"]) type_bundle = google_worksheet_to_type(ws_name, worksheet, app, keys) type_bundle_id = type_bundle["type_id"] type_bundle_dump = jsonbson.dumps_json(type_bundle) type_bundle_path = os.path.abspath(os.path.join(os.getcwd(), 'biblib', 'conf', 'types', type_bundle_id + ".json")) logging.debug(type_bundle_dump) with open(type_bundle_path, "w") as type_bundle_file: type_bundle_file.write(type_bundle_dump) logging.debug(jsonbson.jsonbson(keys))
def retrieve_google_types(app): spreadsheet = GSpreadsheet(key=config["google"]["conf_key"], email=config["google"]["email"], password=config["google"]["password"]) keys = {} type_worksheets = get_google_types_worksheets(spreadsheet) for ws_id, ws_name in type_worksheets: logging.debug('worksheet code: {}, name: {}'.format(ws_id, ws_name)) worksheet = GSpreadsheet(worksheet=ws_id, key=config["google"]["conf_key"], email=config["google"]["email"], password=config["google"]["password"]) type_bundle = google_worksheet_to_type(ws_name, worksheet, app, keys) type_bundle_id = type_bundle["type_id"] type_bundle_dump = jsonbson.dumps_json(type_bundle) type_bundle_path = os.path.abspath( os.path.join(os.getcwd(), 'biblib', 'conf', 'types', type_bundle_id + ".json")) logging.debug(type_bundle_dump) with open(type_bundle_path, "w") as type_bundle_file: type_bundle_file.write(type_bundle_dump) logging.debug(jsonbson.jsonbson(keys))
def document(item): #logging.debug(DOCUMENTS) url = config["endpoint"] + DOCUMENTS #logging.debug(url) #logging.debug(item) if "tags" in item: tags = jsonbson.dumps_json(item["tags"]) del item["tags"] logging.debug(tags) item["tags"] = tags params_encoded = urllib.urlencode(item) logging.debug(params_encoded) request = urllib2.Request(url, params_encoded) response = urllib2.urlopen(request) result = response.read() #logging.debug(result) return jsonbson.load_json_str(result) try: pass except: logging.error("*** Error uploading item to sven : {}".format(item["title"])) return {"status": "error"}
def csv_dict_reader_to_metasjon(csv_row, input_format, source, rec_id_prefix): document = Document() if source: document["rec_source"] = source if input_format == constants.FORMAT_CSV_SITPOL: #logging.debug("csv_dict_reader_to_metasjon type(csv_row): {}".format(type(csv_row))) #print csv_row document["title"] = csv_row["title"] classifications_sitpol = [x.strip() for x in csv_row["classifications_sitpol"].split(";") if x.strip()] if classifications_sitpol: document["classifications_sitpol"] = classifications_sitpol classifications_ddc = [x.strip() for x in csv_row["classifications_ddc"].split(";") if x.strip()] if classifications_ddc: document["classifications_ddc"] = classifications_ddc formatted_names = [x.strip() for x in csv_row["creators@role=pbl"].split(";") if x.strip()] if formatted_names: #logging.debug("formatted_names: {}".format(formatted_names)) creators = [] for formatted_name in formatted_names: if formatted_name: creator = creator_service.formatted_name_to_creator(formatted_name, None, "pbl") if creator: creators.append(creator) if creators: document["creators"] = creators document["date_last_accessed"] = datetime.now().isoformat() document["descriptions"] = [{"language":"fr", "value":csv_row["descriptions@lang=fr"]}] keywords_fr = [x.strip() for x in csv_row["keywords@lang=fr"].split(";") if x.strip()] keywords_en = [x.strip() for x in csv_row["keywords@lang=en"].split(";") if x.strip()] keywords = {} if keywords_fr: keywords["fr"] = keywords_fr if keywords_en: keywords["en"] = keywords_en if keywords: document["keywords"] = keywords document["languages"] = [x.strip() for x in csv_row["languages"].split(";") if x.strip()] note = csv_row["notes@lang=fr"] if note: document["notes"] = note document["publication_countries"] = [x.strip() for x in csv_row["publication_countries"].split(";") if x.strip()] if "rec_created_user" in csv_row: document["rec_created_user"] = csv_row["rec_created_user"] document["rec_type_cerimes"] = csv_row["rec_type_cerimes"] specific_agents = [x.strip() for x in csv_row["specific_agents"].split(";") if x.strip()] if specific_agents: document["specific_agents"] = specific_agents document["specific_actor_type"] = csv_row["specific_actor_type"] document["target_audiences_cerimes"] = csv_row["target_audiences_cerimes"] document["url"] = csv_row["url"] document["rec_type"] = constants.DOC_TYPE_WEBENTITY document["webentity_type"] = csv_row["webentity_type"] elif input_format == constants.FORMAT_CSV_METAJSON: document["rec_type"] = "DatasetQuali" creators = [] if "Laboratoire d'inventaire" in csv_row: creators.append(creator_service.formatted_name_to_creator(csv_row["Laboratoire d'inventaire"], constants.REC_CLASS_ORGUNIT, "dpt")) document["title"] = csv_row["Titre de l'enquete"] if "Sujet(s) de l'enquete" in csv_row: document["keywords"] = [x.strip() for x in csv_row["Sujet(s) de l'enquete"].split("\n") if x.strip()] if "Nom Auteur 1" in csv_row: name_familly = csv_row["Nom Auteur 1"] name_given = affiliation = "" if "Prenom Auteur 1" in csv_row: name_given = csv_row["Prenom Auteur 1"] if "Affiliation Auteur 1" in csv_row: affiliation = csv_row["Affiliation Auteur 1"] document["creators"] = creators else: logging.error("Unknown input_format: {}".format(input_format)) logging.info(jsonbson.dumps_json(document, True)) return document
def dump_result(result): print jsonbson.dumps_json(result, True)