def create_title_bool_and(record: Dict[str, str]) -> str: if 'Subtitle' in record.keys() and record["Subtitle"] not in ["N/A", ""]: query_str = "(" + ")+AND+(".join([ normalize(str(record['Main Title'])), normalize(str(record['Subtitle'])) ]) + ")" else: query_str = normalize(record['Main Title']) # logger.debug('Title Boolean phrase or string: ' + query_str) return query_str
def look_up_book_in_worldcat(book_dict: Dict[str, str]) -> pd.DataFrame: # Generate query string full_title = create_full_title(book_dict) logger.info(f'Looking for "{full_title}" in WorldCat...') # Data currently has one author last name; otherwise I'd do what's commented below or process one-to-many relationship # query_author = normalize(f"{book_dict['Author_First']} {book_dict['Author_Last']}) # Replacing apostrophe because they are breaking query strings when they occur query_author = book_dict['Author_Last'].replace("'", " ") query_title = normalize(full_title) query_str = f'srw.ti all "{query_title}" and srw.au all "{query_author}"' logger.debug(query_str) params = { 'wskey': WC_API_KEY, "query": query_str, "maximumRecords": 100, 'frbrGrouping': 'off' } result = make_request_using_cache(WC_BIB_BASE_URL, params) if not result: return pd.DataFrame({}) records = parse_marcxml(result) records_df = pd.DataFrame(records) logger.info(f'Number of WorldCat records found: {len(records_df)}') logger.debug(records_df.head(10)) return records_df
def main(): if len(sys.argv) != 3: print("Usage:\npython extract_pov.py [SOURCE FILE/FOLDER] [SAVE_DIR]") exit(1) else: source = sys.argv[1] save_dir = sys.argv[2] if not os.path.exists(save_dir): print("Creating directory %s." % save_dir) os.makedirs(save_dir) if os.path.isdir(source): print("Reading from directory %s." % source) # Walk through the directory for root, _, files in os.walk(source): for f in files: name, ext = os.path.splitext(f) if ext == '.pcd': print("Reading from file %s." % f) pts = load_point_cloud(os.path.join(root, f)) x, y, z = normalize(pts) if not os.path.exists(os.path.join(save_dir, name)): os.makedirs(os.path.join(save_dir, name)) save_point_cloud( pts, os.path.join(save_dir, name, name + '_normalized.pcd')) # Create folder in save_dir if not os.path.exists(os.path.join(save_dir, name)): os.makedirs(os.path.join(save_dir, name)) # Generate VTK file for reconstructed surface render3D(os.path.join(save_dir, name, name + '_normalized.pcd'), show=False) subprocess.check_call([ './extract_pov', os.path.join(save_dir, name, name + '_normalized_output.vtk'), os.path.join(save_dir, name) + '/', str(x[0]), str(x[1]), str(x[2]), str(y[0]), str(y[1]), str(y[2]), str(z[0]), str(z[1]), str(z[2]) ]) print('Extracted PoV images to %s' % os.path.join(save_dir, name)) else: root, file = os.path.split(source) name, ext = os.path.splitext(file) if ext != '.pcd': print('Invalid file.') exit(1) print("Reading from file %s." % source) pts = load_point_cloud(source) x, y, z = normalize(pts) if not os.path.exists(os.path.join(save_dir, name)): os.makedirs(os.path.join(save_dir, name)) save_point_cloud( pts, os.path.join(save_dir, name, name + '_normalized.pcd')) # Create folder in save_dir if not os.path.exists(os.path.join(save_dir, name)): os.makedirs(os.path.join(save_dir, name)) # Generate VTK file for reconstructed surface render3D(os.path.join(save_dir, name, name + '_normalized.pcd'), show=False) subprocess.check_call([ './extract_pov', os.path.join(save_dir, name, name + '_normalized_output.vtk'), os.path.join(save_dir, name) + '/', str(x[0]), str(x[1]), str(x[2]), str(y[0]), str(y[1]), str(y[2]), str(z[0]), str(z[1]), str(z[2]) ]) print('Extracted PoV images to %s' % os.path.join(save_dir, name))
def look_up_book_in_resource(book_dict: Dict[str, str]) -> pd.DataFrame: # Generate query string # logger.info(f'Looking for {book_dict["Main Title"]} in Harvard LibraryCloud...') query_author = normalize(f"{book_dict['Author 1 Given']} {book_dict['Author 1 Initial']} {book_dict['Author 1 Family']}") # query_author = book_dict['authorLast'] query_author.replace("'", " ") title_bool_and = create_title_bool_and(book_dict) params = { 'title' : title_bool_and, 'name' : query_author, 'limit': 10, 'publisher' : book_dict['Publisher'] } query_str = f'&'.join([k+'='+str(params[k]) for k in list(params.keys())]) # logger.debug(query_str) records = {} result = make_request_using_cache(BIB_BASE_URL, params) if result: records.update(parse_modsxml(result,book_dict)) if len(list(records.keys())) == 0: params.pop('publisher') result = make_request_using_cache(BIB_BASE_URL, params) if result: records.update(parse_modsxml(result,book_dict)) if book_dict['Publisher'] != book_dict['Copyright Holder']: params['publisher'] = book_dict['Copyright Holder'] second_result = make_request_using_cache(BIB_BASE_URL, params) if second_result: second_records = parse_modsxml(second_result,book_dict) if len(list(second_records.keys())) > 0: records.update(second_records) # print(records) # records.update(use_isbnlib({book_dict['ID']:book_dict})) categorized = False for k in list(records.keys()): r = records[k] for col in ['ebook ISBN', 'paper ISBN','hardcover ISBN']: if col in r: if len(r[col]) > 0: categorized = True for k in list(records.keys()): r = records[k] if len(str(r['Uncategorized ISBN'])) > 40: categorized = False # if categorized == False: # records.update(use_isbnlib(records)) if records == {}: return pd.DataFrame({}) else: # records.update(use_isbnlib(records)) records_df = pd.DataFrame.from_dict(records,orient='index') # logger.info(f'Number of records found: {len(records_df)}') # logger.debug(records_df.head(10)) return records_df