Esempio n. 1
0
def run(medline_path, clean, start, end, PROCESSES):
    con = 'postgresql://*****:*****@localhost/' + db

    if end != None:
        end = int(end)

    if clean:
        PubMedDB.create_tables(db)

    PubMedDB.init(db)

    paths = []
    for root, dirs, files in os.walk(medline_path):
        for filename in files:
            if os.path.splitext(filename)[-1] in [".xml", ".gz"]:
                paths.append(os.path.join(root, filename))

    paths.sort()

    pool = Pool(processes=PROCESSES)  # start with processors
    print "Initialized with ", PROCESSES, "processes"
    #result.get() needs global variable db now - that is why a line "db = options.database" is added in "__main__" - the variable db cannot be given to __start_parser in map_async()
    result = pool.map_async(_start_parser, paths[start:end])
    res = result.get()
    #without multiprocessing:
    #for path in paths:
    #    _start_parser(path)

    print "######################"
    print "###### Finished ######"
    print "######################"
Esempio n. 2
0
def run(medline_path, clean, start, end, PROCESSES):
    con = 'postgresql://*****:*****@localhost/'+db

    if end != None:
        end = int(end)

    if clean:
        PubMedDB.create_tables(db)
    
    PubMedDB.init(db)

    paths = []
    for root, dirs, files in os.walk(medline_path):
        for filename in files:
            if os.path.splitext(filename)[-1] in [".xml", ".gz"]:
                paths.append(os.path.join(root,filename))

    paths.sort()
    

    pool = Pool(processes=PROCESSES)    # start with processors
    print "Initialized with ", PROCESSES, "processes"
    #result.get() needs global variable db now - that is why a line "db = options.database" is added in "__main__" - the variable db cannot be given to __start_parser in map_async()
    result = pool.map_async(_start_parser, paths[start:end])
    res = result.get()
    #without multiprocessing:
    #for path in paths:
    #    _start_parser(path)

    print "######################"
    print "###### Finished ######"
    print "######################"
Esempio n. 3
0
    def __init__(
            self,
            filepath,
            db_name_input='pubmed'):  # TODO make way to pass db name as well
        db_engine, base = PubMedDB.init(db_name_input)

        self.filepath = filepath
        self.connection = db_engine.connect()

        Session = sessionmaker(bind=db_engine)
        self.session = Session(bind=self.connection)
Esempio n. 4
0
 def __init__(self, filepath, db):
     engine, Base = PubMedDB.init(db)
     Session = sessionmaker(bind=engine)
     self.filepath = filepath
     self.session = Session()
Esempio n. 5
0
 def __init__(self, filepath,db):
     engine, Base = PubMedDB.init(db)
     Session = sessionmaker(bind=engine)
     self.filepath = filepath
     self.session = Session()
Esempio n. 6
0
 def __init__(self, db_name_input):
     # Only make a single database connection pool
     db_engine, base = PubMedDB.init(db_name_input)
     self.db_name = db_name_input
     self.db_engine = db_engine