def main(): parser = argparse.ArgumentParser( description='extrace trips from a stream or list of Waypoints.') parser.add_argument('--stream', action='store_true', help='extrace trips from a stream of Waypoints') parser.add_argument('--list', action='store_true', help='extrace trips from a list of Waypoints') parser.add_argument('--source', dest='source', help='data source file with vaild json format', required=True) args = parser.parse_args() data_json = load_from_json_file(args.source) if isinstance(data_json, list): waypoints = convert_data_to_waypoints(data_json) else: print("Error: %s" % data_json) parser.print_help() exit(0) if args.list: list_processor = WaypointListProcessor(waypoints) trips = list_processor.get_trips() print(json.dumps(trip_waypoint_format(trips))) elif args.stream: stream_processor = WaypointStreamProcessor() trips = [ stream_processor.process_waypoint(waypoint) for waypoint in waypoints ] trips = filter(None, trips) print(json.dumps(trip_waypoint_format(trips))) else: parser.print_help()
# -*- coding: utf-8 -*- import codecs import re from string import whitespace from utils import split_subfields, format_date, flatten_list, log_dir, load_from_json_file, dump_to_json_file caus = load_from_json_file('caus.json') languages = {'': '', 'Albanian': 'alb', 'Arabic': 'ara', 'Armenian': 'arm', 'Azerbaijani': 'aze', 'Bengali': 'ben', 'Bulgarian': 'bul', 'Burmese': 'bur', 'Catalan': 'cat', 'Chinese': 'chi', 'Czech': 'cze', 'Danish': 'dan', 'Dutch': 'dut', 'English': 'eng', 'Englsih': 'eng', 'Farsi': 'per', 'Finnish': 'fin', 'French': 'fre', 'France': 'fre', 'German': 'ger', 'Greek, Ancient (to 1453)': 'grc', 'Greek, Modern (1453-)': 'gre', 'Hausa': 'hau', 'Hebrew': 'heb', 'Hindi': 'hin', 'Hrv': 'hrv', 'Hungarian': 'hun', 'Icelandic': 'ice', 'Indonesian': 'ind', 'Italian': 'ita', 'Japanese': 'jpn', 'Kazakh': 'kaz', 'Korean': 'kor', 'Latin': 'lat', 'Latvian': 'lav', 'Lithuanian': 'lit', 'Macedonian': 'mac', 'Malay': 'mal', 'Malaysian': 'mal', 'Multilingual': 'mul', 'Multiple languages': 'mul', 'Nepali': 'nep', 'Norwegian': 'nor', 'Persian, Modern': 'per', 'Polish': 'pol', 'Portuguese': 'por', 'Romanian': 'rum', 'Rumanian': 'rum', 'Russian': 'rus', 'Serbo-Croatian': '---', 'Serbo-Croatian (Cyrillic)': 'srp', 'Serbo-Croatian (Roman)': 'hrv', 'Singhalese': 'sin', 'Slavic (Other)': 'sla', 'Slovak': 'slo', 'Slovenian': 'slv', 'Somali': 'som', 'Spanish': 'spa', 'Swahili': 'swa', 'Swedish': 'swe', 'Tagalog': 'tag', 'Tamil': 'tam', 'Thai': 'tha', 'Turkish': 'tur', 'Ukrainian': 'ukr', 'Undetermined': 'und', 'Urdu': 'urd', 'Uzbek': 'uzb', 'Vietnamese': 'vie', 'Welsh': 'wel', 'Zulu': 'zul', 'ru': 'rus'} labels = { # "aecl reference number": lambda x: None, # "aecl report number": lambda x: None, "afcen no.": lambda x: {"tag": "088", "ind1": " ", "ind2": " ", "subs": {"a" : x}}, # "aisbn": lambda x: None, "asia report number": lambda x: {"tag": "088", "ind1": " ", "ind2": " ", "subs": {"a" : x}}, "bulletin number": lambda x: {"tag": "088", "ind1": " ", "ind2": " ", "subs": {"a" : x}}, # "cea report no.": lambda x: None, # "cnsc catalogue number": lambda x: None, "coden": lambda x: None, # "conference number": lambda x: None,
mapping['X926'] = lambda x: [{'tag': '926', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}] # "Access as of:" (summary of holdings) sp_mapping = mapping # sp_mapping['PUB'] = lambda x: [{'tag': '264', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}] #FIXME: split corporate authors sp_mapping['CAU'] = lambda x: [{'tag': '110', 'ind1': ' ', 'ind2': '2', 'subs': {'a': x}}] #split them formatting_config = {'sub': '|', 'tag_beg': '', 'tag_end': '%\n', 'rec_beg': '', 'rec_end': '\x1d\n', } fields = load_from_json_file('fields.json') tables = ['T.ACQFUNDS', 'T.ACQINVOICE', 'T.ACQISELRCI', 'T.ACQISELRCV', 'T.ACQITEMS', 'T.ACQPOS', 'T.APPUPDATE', 'T.APPUSERS', 'T.AV', 'T.AVUSE', 'T.BOILERPLAT', 'T.CAT', 'T.CATADDITEM', 'T.CATBAK', 'T.CATCIRC', 'T.CATIN', 'T.CATINS', 'T.CATLINK', 'T.CATMASTER', 'T.CATOUT', 'T.CATOUTS', 'T.CATOVERDUE', 'T.CATPARTS', 'T.CATPRO', 'T.CATPROADIT', 'T.CATPROANAL', 'T.CATPROBKS', 'T.CATPROSER', 'T.CATPROSP', 'T.CATPROTOC', 'T.CATRENEW', 'T.CATRENEWS', 'T.CATRES', 'T.CATSELREQ', 'T.CATSELREQA', 'T.CATSELREQC', 'T.CATSELREQW', 'T.CATSP', 'T.CATTITLES', 'T.CATTOC', 'T.CATWEB', 'T.CATWEB4', 'T.CATWEBDES', 'T.CATWEBSEL', 'T.CATWEBSRV', 'T.CLAIMINT', 'T.CLSAMPTHES', 'T.COLLECTION', 'T.COUNTERS', 'T.CURRENCY', 'T.DIVISION', 'T.EMAIL', 'T.EMAILDB', 'T.GLOBAL', 'T.GLOBAL1', 'T.GLOBAL2', 'T.GLOBAL3', 'T.GLOBAL4', 'T.HNPU', 'T.IAEACSBC', 'T.IAEAUSERS', 'T.INISCAT', 'T.INISCATV17', 'T.INISCATWEB', 'T.INISTHES', 'T.INVENTORY', 'T.ISSCODE', 'T.ISSDATES', 'T.ISSDATESEL', 'T.ISSN', 'T.LDOCKWS', 'T.LDOCKWSN', 'T.LIBSCFORMS', 'T.LIBSGLOBAL', 'T.LIBSSERVER', 'T.LIBTRACK', 'T.LIBTYPES', 'T.LIBVENDORS', 'T.LNUMTYPES', 'T.LREFREQS', 'T.LREFREQSC', 'T.LSELRESCAN', 'T.LSERVICES', 'T.LSERVICESP', 'T.LSERVICESR', 'T.LSERVICESW', 'T.LSERVNUMS', 'T.MARCTEMP', 'T.ORDERSTAT', 'T.ORDERTYPES', 'T.PICKCTRY', 'T.PICKCTRYED', 'T.PICKLANG', 'T.PICKLANGED', 'T.PICKSTEP', 'T.PROJECTS', 'T.PWSELECTS', 'T.RAINBOW', 'T.REGLIBUSE', 'T.REQTYPE', 'T.RESERVE', 'T.RESERVEC', 'T.RESPICK', 'T.SERHOLD', 'T.SERHOLD2', 'T.SERIALS', 'T.SERIALS2', 'T.SERIALSBK', 'T.SERIALSC1', 'T.SERIALSC2', 'T.SERIALSC3', 'T.SERIALSDM', 'T.SERIALSID', 'T.SERIALSID2', 'T.SERIALSIN', 'T.SERIALSNE', 'T.SERIALSNE2', 'T.SERIALSNT', 'T.SERIALSNT2', 'T.SERIALSUNX', 'T.SERROUTE', 'T.SERROUTE2', 'T.SERSELEOYR', 'T.SERSELHOLD', 'T.SERSELVUP', 'T.SFREQS', 'T.SP', 'T.STAFPUB', 'T.STATISTIC', 'T.STATS', 'T.SUBINFO', 'T.SUBSNO', 'T.TOPIC', 'T.TUNCAT', 'T.TUNCATTITL', 'T.TUNCATWEB4', 'T.TUNTITLES', 'T.VALDB', 'T.VALIDATION', 'T.WAITCODES', 'T.WEBDES', 'T.WEBPROFILE', 'T.WEBSAMPTHS', 'T.WEBSERVER', 'T.WEBTYPES', 'T.WHENX',