def test(): base_dir = os.path.join(os.getcwd(), "data") print "base_dir: {}".format(base_dir) #query_string = "s.q=europe&s.cmd%5B%5D=addFacetValueFilters%28IsFullText%2Ctrue%29&s.fq%5B%5D=SourceType%3A%28%22Library+Catalog%22%29&s.fvf%5B%5D=ContentType%2CNewspaper+Article%2Ct&s.fvf%5B%5D=ContentType%2CBook+Review%2Ct&s.fvf%5B%5D=ContentType%2CDissertation%2Ct" #query_string = "s.cmd=addFacetValueFilters(ContentType,Book+Review)&s.fvf%5B%5D=ContentType,Journal+Article,f&s.fvf%5B%5D=ContentType,Dissertation,t&s.fvf%5B%5D=IsFullText,true,f&s.fvf%5B%5D=ContentType,Newspaper+Article,t&s.q=europe" #query_string = "s.cmd=addFacetValueFilters(ContentType,Journal+Article)&s.fvf=IsFullText,true,f&s.q=europe+federal" #query_string = "s.fvf%5B%5D=ContentType%2CJournal+Article%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.q=europe+federal&s.cmd=addFacetValueFilters(ContentType,Dissertation)%20removeFacetValueFilter(ContentType,Journal%20Article)" #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf&s.fvf%5B%5D=ContentType%2CData+Set%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.fvf%5B%5D=ContentType%2CNewspaper+Article%2Cf&s.fvf%5B%5D=ContentType%2CTrade+Publication+Article%2Cf&s.ps=50&s.q=europe+federal&s.cmd=removeFacetValueFilter(ContentType,Newspaper%20Article)%20removeFacetValueFilter(ContentType,Trade%20Publication%20Article)%20removeFacetValueFilter(ContentType,Data%20Set)" #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.ps=50&s.q=europe+federal&s.cmd=addFacetValueFilters(ContentType,Data+Set)" query_string = "s.q=Organized+Crime+and+States" # Thesis: #query_string = "s.cmd=addFacetValueFilters(ContentType,Dissertation)&s.q=social" # Video: #query_string = "s.q=social&s.fvf%5B%5D=ContentType%2CVideo+Recording%2Cf" # EditedBook #query_string = "s.q=ouvrage+collectif&s.cmd=addFacetValueFilters(ContentType,Book+%2F+eBook)" #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf" summon_response = summon.summon_query(query_string) metajson_list = crosswalks_service.convert_json( summon_response, constants.FORMAT_SUMMONJSON, "summon", False) collection = metajson_service.create_collection("summon_test", "Summon Test", metajson_list) print jsonbson.dumps_json(collection, True)
def convert_metajson_list_in_one_file(metajson_list, output_format): if metajson_list: if output_format == constants.FORMAT_METAJSON or output_format == constants.FORMAT_HTML: yield metajson_service.create_collection(None, None, metajson_list) elif output_format == constants.FORMAT_MODS: yield mods_crosswalk.metajson_list_to_mods_xmletree(metajson_list) elif output_format == constants.FORMAT_METS: yield mets_crosswalk.metajson_list_to_mets_xmletree(metajson_list) elif output_format == constants.FORMAT_REPEC: yield repec_crosswalk.metajson_list_to_repec(metajson_list) elif output_format == constants.FORMAT_CSV_METAJSON: yield csv_crosswalk.metajson_list_to_csv_metajson(metajson_list) else: logging.error("convert_metajson_list ERROR Not managed format: {}".format(output_format))
def parse_and_convert_file_list(input_file_path_list, input_format, output_format, source, rec_id_prefix, only_first_record, all_in_one_file): """ Convert from a list of file path """ results = [] for input_file_path in input_file_path_list: file_results = parse_and_convert_file(input_file_path, input_format, output_format, source, rec_id_prefix, only_first_record, False) #logging.debug("parse_and_convert_file_list type(file_results): {}".format(type(file_results))) if file_results: results.extend(file_results) if results: if not all_in_one_file: return results else: if output_format == constants.FORMAT_METAJSON or output_format == constants.FORMAT_HTML: return metajson_service.create_collection(None, None, results) elif output_format == constants.FORMAT_MODS: return mods_crosswalk.create_mods_collection_xmletree(results) else: logging.error("parse_and_convert_file_list: ERROR Not managed format: {}".format(output_format))
def test(): base_dir = os.path.join(os.getcwd(), "data") print "base_dir: {}".format(base_dir) #query_string = "s.q=europe&s.cmd%5B%5D=addFacetValueFilters%28IsFullText%2Ctrue%29&s.fq%5B%5D=SourceType%3A%28%22Library+Catalog%22%29&s.fvf%5B%5D=ContentType%2CNewspaper+Article%2Ct&s.fvf%5B%5D=ContentType%2CBook+Review%2Ct&s.fvf%5B%5D=ContentType%2CDissertation%2Ct" #query_string = "s.cmd=addFacetValueFilters(ContentType,Book+Review)&s.fvf%5B%5D=ContentType,Journal+Article,f&s.fvf%5B%5D=ContentType,Dissertation,t&s.fvf%5B%5D=IsFullText,true,f&s.fvf%5B%5D=ContentType,Newspaper+Article,t&s.q=europe" #query_string = "s.cmd=addFacetValueFilters(ContentType,Journal+Article)&s.fvf=IsFullText,true,f&s.q=europe+federal" #query_string = "s.fvf%5B%5D=ContentType%2CJournal+Article%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.q=europe+federal&s.cmd=addFacetValueFilters(ContentType,Dissertation)%20removeFacetValueFilter(ContentType,Journal%20Article)" #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf&s.fvf%5B%5D=ContentType%2CData+Set%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.fvf%5B%5D=ContentType%2CNewspaper+Article%2Cf&s.fvf%5B%5D=ContentType%2CTrade+Publication+Article%2Cf&s.ps=50&s.q=europe+federal&s.cmd=removeFacetValueFilter(ContentType,Newspaper%20Article)%20removeFacetValueFilter(ContentType,Trade%20Publication%20Article)%20removeFacetValueFilter(ContentType,Data%20Set)" #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.ps=50&s.q=europe+federal&s.cmd=addFacetValueFilters(ContentType,Data+Set)" query_string = "s.q=Organized+Crime+and+States" # Thesis: #query_string = "s.cmd=addFacetValueFilters(ContentType,Dissertation)&s.q=social" # Video: #query_string = "s.q=social&s.fvf%5B%5D=ContentType%2CVideo+Recording%2Cf" # EditedBook #query_string = "s.q=ouvrage+collectif&s.cmd=addFacetValueFilters(ContentType,Book+%2F+eBook)" #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf" summon_response = summon.summon_query(query_string) metajson_list = crosswalks_service.convert_json(summon_response, constants.FORMAT_SUMMONJSON, "summon", False) collection = metajson_service.create_collection("summon_test", "Summon Test", metajson_list) print jsonbson.dumps_json(collection, True)
def write_metajson_collection(col_id, col_title, items, output_file_path): if items: #logging.debug("write_metajson_collection type(items): {}".format(type(items))) collection = metajson_service.create_collection(col_id, col_title, items) write_json(collection, output_file_path)