def run(medline_path, clean, start, end, PROCESSES): con = 'postgresql://*****:*****@localhost/' + db if end != None: end = int(end) if clean: PubMedDB.create_tables(db) PubMedDB.init(db) paths = [] for root, dirs, files in os.walk(medline_path): for filename in files: if os.path.splitext(filename)[-1] in [".xml", ".gz"]: paths.append(os.path.join(root, filename)) paths.sort() pool = Pool(processes=PROCESSES) # start with processors print "Initialized with ", PROCESSES, "processes" #result.get() needs global variable db now - that is why a line "db = options.database" is added in "__main__" - the variable db cannot be given to __start_parser in map_async() result = pool.map_async(_start_parser, paths[start:end]) res = result.get() #without multiprocessing: #for path in paths: # _start_parser(path) print "######################" print "###### Finished ######" print "######################"
def run(medline_path, clean, start, end, PROCESSES): con = 'postgresql://*****:*****@localhost/'+db if end != None: end = int(end) if clean: PubMedDB.create_tables(db) PubMedDB.init(db) paths = [] for root, dirs, files in os.walk(medline_path): for filename in files: if os.path.splitext(filename)[-1] in [".xml", ".gz"]: paths.append(os.path.join(root,filename)) paths.sort() pool = Pool(processes=PROCESSES) # start with processors print "Initialized with ", PROCESSES, "processes" #result.get() needs global variable db now - that is why a line "db = options.database" is added in "__main__" - the variable db cannot be given to __start_parser in map_async() result = pool.map_async(_start_parser, paths[start:end]) res = result.get() #without multiprocessing: #for path in paths: # _start_parser(path) print "######################" print "###### Finished ######" print "######################"
def __init__( self, filepath, db_name_input='pubmed'): # TODO make way to pass db name as well db_engine, base = PubMedDB.init(db_name_input) self.filepath = filepath self.connection = db_engine.connect() Session = sessionmaker(bind=db_engine) self.session = Session(bind=self.connection)
def __init__(self, filepath, db): engine, Base = PubMedDB.init(db) Session = sessionmaker(bind=engine) self.filepath = filepath self.session = Session()
def __init__(self, filepath,db): engine, Base = PubMedDB.init(db) Session = sessionmaker(bind=engine) self.filepath = filepath self.session = Session()
def __init__(self, db_name_input): # Only make a single database connection pool db_engine, base = PubMedDB.init(db_name_input) self.db_name = db_name_input self.db_engine = db_engine