def set_item_param(self, item, key, value):
		"""
		Sets item param, applying additional conversion if needed.
		"""
		try:
			if key in config.parser.list_params:
				value = utils.strip_split_list(value, config.parser.list_sep)
			elif key in config.parser.file_list_params:
				value = utils.strip_split_list(value, config.parser.list_sep)
				filesize_value = []
				for single_filename in value:
					#filenames start from slash, trimming it
					abspath = os.path.join(config.www.elibrary_dir, single_filename[1:])
					if (os.path.isfile(abspath)):
						filesize_value.append(os.path.getsize(abspath))
					else:
						logging.warn("File is not accessible: {0}".format(abspath))
						filesize_value.append(0)
				item.set(const.FILE_SIZE_PARAM, filesize_value)
			elif key in config.parser.keyword_list_params:
				value = utils.strip_split_list(value, config.parser.list_sep)
			elif (key in config.parser.int_params) and value.isdecimal():
				value = int(value)
			elif key in config.parser.year_params:
				(year_from, year_to, year_circa) = utils.parse_year(value)
				item.set(key + config.parser.start_suffix, year_from)
				item.set(key + config.parser.end_suffix, year_to)
				item.set(key + config.parser.circa_suffix, year_circa)
			elif key in config.parser.date_params:
				value = datetime.datetime.strptime(value, config.parser.date_format)

		except ValueError:
			self.raise_error()

		item.set(key, value)
Esempio n. 2
0
	def set_item_param(self, item, key, value):
		"""
		Sets item param, applying additional conversion if needed.
		"""
		if key in config.parser.latex_params:
			utils.validate_latex(item, key, value)
			value = utils.parse_latex(value)

		try:
			if key in config.parser.list_params:
				value = utils.strip_split_list(value, config.parser.list_sep)
			elif key in config.parser.file_list_params:
				value = utils.strip_split_list(value, config.parser.list_sep)
				filesize_value = []
				for single_filename in value:
					#filenames start from slash, trimming it
					abspath = os.path.join(config.www.elibrary_dir, single_filename[1:])
					if (os.path.isfile(abspath)):
						filesize_value.append(os.path.getsize(abspath))
					else:
						logging.warn("File is not accessible: {0}".format(abspath))
						filesize_value.append(0)
				item.set(const.FILE_SIZE_PARAM, filesize_value)
			elif key in config.parser.keyword_list_params:
				value = utils.strip_split_list(value, config.parser.list_sep)
				useful_keywords = (set(value) <= config.parser.useless_keywords)
				item.set("useful_" + key, useful_keywords)
			elif key in config.parser.int_params:
				value = int(value)
			elif key in config.parser.year_params:
				(year_from, year_to, year_circa) = utils.parse_year(value)

				item.set(key + config.parser.start_suffix, year_from)
				item.set(key + config.parser.end_suffix, year_to)
				item.set(key + config.parser.circa_suffix, year_circa)
			elif key in config.parser.date_params:
				value = datetime.datetime.strptime(value, config.parser.date_format)

		except ValueError:
			self.raise_error()

		item.set(key, value)
	def set_item_param(self, item: BibItem, key: str, value: str):
		"""
		Sets item param, applying additional conversion if needed.
		"""
		value = utils.parse_latex(value)
		
		try:
			if key in config.parser.list_params:
				value = utils.strip_split_list(value, config.parser.list_sep)
			elif key in config.parser.int_params:
				value = int(value)
			elif key in config.parser.year_params:
				(year_from, year_to, year_circa) = utils.parse_year(value)

				item.set(key + config.parser.start_suffix, year_from)
				item.set(key + config.parser.end_suffix, year_to)
				item.set(key + config.parser.circa_suffix, year_circa)
			elif key in config.parser.date_params:
				value = datetime.datetime.strptime(value, config.parser.date_format)
				
		except ValueError:
			self.raise_error()
				
		item.set(key, value)
Esempio n. 4
0
def retrieve_metadata(mutagen_file, filename):
    # Album artist
    album_artist = mutagen_file.get("albumartist")
    if album_artist == None:
        album_artist = mutagen_file.get("TPE2")
        if album_artist == None:
            album_artist = mutagen_file.get('aART')
            if album_artist == None:
                album_artist = mutagen_file.get('ALBUM ARTIST')
    album_artist = fix_index(album_artist)

    # Year
    date = mutagen_file.get("date", None)
    if date == None:
        date = mutagen_file.get("TDRC", None)
        if date == None:
            try:
                date = mutagen_file.get("\xa9day", None)
            except ValueError: # For Ogg Opus files
                pass
            if date == None:
                date = mutagen_file.get('DATE') # TODO ensure discovery date exists
    if date != None:
        date = parse_year(str(fix_index(date)))
    else:
        print(f"No date loaded for: {filename}")
        date = ""

    # Codec
    codec = type(mutagen_file).__name__
    if codec == "MP4":
        codec = "M4A"
    if codec == "WAVE":
        codec = "WAV"
    if codec == "OggOpus":
        codec = "OPUS"

    # Album
    album = mutagen_file.get("album")
    if album == None:
        album = mutagen_file.get("TALB")
        if album == None:
            album = mutagen_file.get("\xa9alb") # ©alb
            if album == None:
                album = mutagen_file.get("ALBUM") # ©alb
    album = fix_index(album)

    # Title
    title = mutagen_file.get("title")
    if title == None:
        title = mutagen_file.get("TIT2")
        if title == None:
            title = mutagen_file.get("\xa9nam") # ©nam
            if title == None:
                title = mutagen_file.get("TITLE")
    title = fix_index(title)

    # Track Number
    tracknum = mutagen_file.get("tracknumber")
    if tracknum == None:
        tracknum = mutagen_file.get("TRCK")
        if tracknum == None:
            tracknum = mutagen_file.get("trkn")
    tracknum = fix_index(tracknum)
    if tracknum != None:
        if codec == "M4A":
            tracknum = tracknum[0]
        tracknum = str(tracknum).zfill(2)
    else:
        print(f"No track number loaded for: {filename}")

    return album, album_artist, str(date), codec, title, tracknum
Esempio n. 5
0
parser.add_argument('--graph_path', help='path to citation-cited csv')

args = parser.parse_args()
logging.StreamHandler(sys.stdout)
logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%d-%b-%y %H:%M:%S',
                    level=logging.INFO)

if __name__ == '__main__':

    paper_year_dict = {}
    logging.info('Parsing Year from dataset')

    for file in os.listdir(args.dataset):
        if file.startswith(('P', 'RM')):
            paper_year_dict = utils.parse_year(args.dataset + file,
                                               paper_year_dict)

    logging.info('Serialising Paper- Year Dictionary')
    utils.dump_file(args.dumps, 'paper_year_dict', paper_year_dict)

    global_citation_graph = ''
    logging.info('Parsing Dataset')

    global_citation_graph = utils.create_graph(args.graph_path,
                                               paper_year_dict)
    logging.info('Serialising Global Citation Graph')
    utils.dump_file(args.dumps, 'global_citation_graph_full',
                    global_citation_graph)

    logging.info('Removing Cycles')
    global_citation_graph = utils.remove_cycles(global_citation_graph)