def set_item_param(self, item, key, value): """ Sets item param, applying additional conversion if needed. """ try: if key in config.parser.list_params: value = utils.strip_split_list(value, config.parser.list_sep) elif key in config.parser.file_list_params: value = utils.strip_split_list(value, config.parser.list_sep) filesize_value = [] for single_filename in value: #filenames start from slash, trimming it abspath = os.path.join(config.www.elibrary_dir, single_filename[1:]) if (os.path.isfile(abspath)): filesize_value.append(os.path.getsize(abspath)) else: logging.warn("File is not accessible: {0}".format(abspath)) filesize_value.append(0) item.set(const.FILE_SIZE_PARAM, filesize_value) elif key in config.parser.keyword_list_params: value = utils.strip_split_list(value, config.parser.list_sep) elif (key in config.parser.int_params) and value.isdecimal(): value = int(value) elif key in config.parser.year_params: (year_from, year_to, year_circa) = utils.parse_year(value) item.set(key + config.parser.start_suffix, year_from) item.set(key + config.parser.end_suffix, year_to) item.set(key + config.parser.circa_suffix, year_circa) elif key in config.parser.date_params: value = datetime.datetime.strptime(value, config.parser.date_format) except ValueError: self.raise_error() item.set(key, value)
def set_item_param(self, item, key, value): """ Sets item param, applying additional conversion if needed. """ if key in config.parser.latex_params: utils.validate_latex(item, key, value) value = utils.parse_latex(value) try: if key in config.parser.list_params: value = utils.strip_split_list(value, config.parser.list_sep) elif key in config.parser.file_list_params: value = utils.strip_split_list(value, config.parser.list_sep) filesize_value = [] for single_filename in value: #filenames start from slash, trimming it abspath = os.path.join(config.www.elibrary_dir, single_filename[1:]) if (os.path.isfile(abspath)): filesize_value.append(os.path.getsize(abspath)) else: logging.warn("File is not accessible: {0}".format(abspath)) filesize_value.append(0) item.set(const.FILE_SIZE_PARAM, filesize_value) elif key in config.parser.keyword_list_params: value = utils.strip_split_list(value, config.parser.list_sep) useful_keywords = (set(value) <= config.parser.useless_keywords) item.set("useful_" + key, useful_keywords) elif key in config.parser.int_params: value = int(value) elif key in config.parser.year_params: (year_from, year_to, year_circa) = utils.parse_year(value) item.set(key + config.parser.start_suffix, year_from) item.set(key + config.parser.end_suffix, year_to) item.set(key + config.parser.circa_suffix, year_circa) elif key in config.parser.date_params: value = datetime.datetime.strptime(value, config.parser.date_format) except ValueError: self.raise_error() item.set(key, value)
def set_item_param(self, item: BibItem, key: str, value: str): """ Sets item param, applying additional conversion if needed. """ value = utils.parse_latex(value) try: if key in config.parser.list_params: value = utils.strip_split_list(value, config.parser.list_sep) elif key in config.parser.int_params: value = int(value) elif key in config.parser.year_params: (year_from, year_to, year_circa) = utils.parse_year(value) item.set(key + config.parser.start_suffix, year_from) item.set(key + config.parser.end_suffix, year_to) item.set(key + config.parser.circa_suffix, year_circa) elif key in config.parser.date_params: value = datetime.datetime.strptime(value, config.parser.date_format) except ValueError: self.raise_error() item.set(key, value)
def retrieve_metadata(mutagen_file, filename): # Album artist album_artist = mutagen_file.get("albumartist") if album_artist == None: album_artist = mutagen_file.get("TPE2") if album_artist == None: album_artist = mutagen_file.get('aART') if album_artist == None: album_artist = mutagen_file.get('ALBUM ARTIST') album_artist = fix_index(album_artist) # Year date = mutagen_file.get("date", None) if date == None: date = mutagen_file.get("TDRC", None) if date == None: try: date = mutagen_file.get("\xa9day", None) except ValueError: # For Ogg Opus files pass if date == None: date = mutagen_file.get('DATE') # TODO ensure discovery date exists if date != None: date = parse_year(str(fix_index(date))) else: print(f"No date loaded for: {filename}") date = "" # Codec codec = type(mutagen_file).__name__ if codec == "MP4": codec = "M4A" if codec == "WAVE": codec = "WAV" if codec == "OggOpus": codec = "OPUS" # Album album = mutagen_file.get("album") if album == None: album = mutagen_file.get("TALB") if album == None: album = mutagen_file.get("\xa9alb") # ©alb if album == None: album = mutagen_file.get("ALBUM") # ©alb album = fix_index(album) # Title title = mutagen_file.get("title") if title == None: title = mutagen_file.get("TIT2") if title == None: title = mutagen_file.get("\xa9nam") # ©nam if title == None: title = mutagen_file.get("TITLE") title = fix_index(title) # Track Number tracknum = mutagen_file.get("tracknumber") if tracknum == None: tracknum = mutagen_file.get("TRCK") if tracknum == None: tracknum = mutagen_file.get("trkn") tracknum = fix_index(tracknum) if tracknum != None: if codec == "M4A": tracknum = tracknum[0] tracknum = str(tracknum).zfill(2) else: print(f"No track number loaded for: {filename}") return album, album_artist, str(date), codec, title, tracknum
parser.add_argument('--graph_path', help='path to citation-cited csv') args = parser.parse_args() logging.StreamHandler(sys.stdout) logging.basicConfig(format='%(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S', level=logging.INFO) if __name__ == '__main__': paper_year_dict = {} logging.info('Parsing Year from dataset') for file in os.listdir(args.dataset): if file.startswith(('P', 'RM')): paper_year_dict = utils.parse_year(args.dataset + file, paper_year_dict) logging.info('Serialising Paper- Year Dictionary') utils.dump_file(args.dumps, 'paper_year_dict', paper_year_dict) global_citation_graph = '' logging.info('Parsing Dataset') global_citation_graph = utils.create_graph(args.graph_path, paper_year_dict) logging.info('Serialising Global Citation Graph') utils.dump_file(args.dumps, 'global_citation_graph_full', global_citation_graph) logging.info('Removing Cycles') global_citation_graph = utils.remove_cycles(global_citation_graph)