Ejemplo n.º 1
0
def main(args):
    """
    Main function
    :param args: argparse dict
    :return: None
    """

    print "Start reindexing from {0} to {1} with batch size of {2} and {3} worker processes".format(
        args.source_index, args.destination_index, args.batch_size, args.processes
    )

    client = Elasticsearch()
    print "connected to elastic search at http://localhost:9200"

    docs = scan(client, index=args.source_index)

    count = 0

    queue = Queue(args.batch_size)  # don't fill up queue too much
    pool = Pool(args.processes, worker_main, (queue, args.source_index, args.destination_index, args.batch_size))

    for doc in docs:
        count += 1
        if count % args.batch_size == 0:
            print "put {0}".format(count)
        queue.put(doc, True)
    print "put {0}".format(count)

    # send stop messages
    for i in range(args.processes):
        queue.put(Stop, True)

    pool.close()
    pool.join()
def main():
    arg = parse_args()
    folder = arg.fold
    core = arg.core
    output = arg.out
    start = arg.start
    if start:
        start = start.replace('-', '') + '000000'

    task_queue = Queue()
    result_queue = Queue()

    task_count = create_task(folder, task_queue, start)
    print task_count
    for i in range(core):
        Process(target=worker, args=(task_queue, result_queue)).start()

    #send stop signal
    for i in range(core):
        task_queue.put('STOP')

    #print result
    out_files = {}
    for i in range(task_count):
        actions = result_queue.get()
        user = actions["user"]
        for day in actions["actions"]:
            if day not in out_files:
                out_files[day] = open(os.path.join(output, day), "w")
            out_files[day].write(json.dumps({"user": user, "actions": actions["actions"][day]}) + "\n")
    for day in out_files:
        out_files[day].flush()
        out_files[day].close()
Ejemplo n.º 3
0
def main(world_folder, replacement_file_name):
    global replacements
    world = nbt.world.WorldFolder(world_folder)
    logger = configure_logging()
    logger.info("Starting processing of %s", world_folder)
    if not isinstance(world, nbt.world.AnvilWorldFolder):
        logger.error("%s is not an Anvil world" % (world_folder))
        return 65 # EX_DATAERR
    if replacement_file_name != None:
        logger.info("Using Replacements file: %s", replacement_file_name)
        with open(replacement_file_name, 'r') as replacement_file:
            replacements = json.load(replacement_file)
    # get list of region files, going to pass this into function to process region
    region_files = world.get_regionfiles();
    
    # Parallel
    q = Queue()
    lp = threading.Thread(target=logger_thread, args=[q])
    lp.start()
    p = Pool(initializer=process_init, initargs=[q,replacements], maxtasksperchild=1)
    region_data = p.map(process_region, region_files)
    # Map has finished up, lets close the logging QUEUE
    q.put(None)
    lp.join()
    
    # Not Parallel
#     region_data = map(process_region, region_files)
    
    # Write output data
    write_block_data(region_data,"output.txt")
    return 0
Ejemplo n.º 4
0
def recoverPRNGState(cookie,timeMillisEstimate,PRNGMillisEstimate,IPAddr,serverPort,numWorkers,chunkSize):
    global PRNGMillisDelta
    global initalSeek
    
    q = Queue(0)
    i = 0
    
    if PRNGMillisDelta%chunkSize > 0:
        q.put((PRNGMillisEstimate+PRNGMillisDelta-PRNGMillisDelta%chunkSize,PRNGMillisEstimate+PRNGMillisDelta,initalSeek))
    
    for i in range(PRNGMillisEstimate,PRNGMillisEstimate+PRNGMillisDelta-PRNGMillisDelta%chunkSize,chunkSize):
        q.put((i,i+chunkSize,initalSeek))
        
    desc = []

    seedValue = Value('d', 0)

    # Start worker processes and assign work.                     
    for i in range(numWorkers):
        p = Process(target=recoverPRNGStateWorker, args=(cookie,timeMillisEstimate,q,IPAddr,serverPort,seedValue))
        p.start()
        desc.append(p)
        
    # Wait for worker processes finish.       
    for p in desc:
        p.join()

    return long(seedValue.value)
Ejemplo n.º 5
0
class Manager(Process):
    def __init__(self, wnum=3):
        Process.__init__(self)
        self.s2m = Queue()  # message Manager receive from worker and svr
        self.m2w = Queue()  # message send to works
        self.works = [0] * wnum
        for i in range(wnum):
            self.works[i] = Worker(self.s2m, self.m2w)
            self.works[i].start()

    def stop(self):
        for w in self.works:
            self.m2w.put(None)  # FIXME should call worker.Terminal?

    """
Video Site: bilibili.com
Title:      【BD‧1080P】【高分剧情】鸟人-飞鸟侠 2014【中文字幕】
Type:       Flash video (video/x-flv)
Size:       3410.85 MiB (3576536465 Bytes)

Downloading 【BD‧1080P】【高分剧情】鸟人-飞鸟侠 2014【中文字幕】.flv ...
  0.7% ( 22.2/3410.9MB) [#
    """

    def run(self):
        # reset DB flags
        kuos = get_by_flag(WORK)
        for uo in kuos:
            set_flag(uo.mid, STOP)
        tuos = get_by_flag(WAIT)
        for uo in tuos:
            set_flag(uo.mid, STOP)

        while True:
            msg = self.s2m.get()
            # print("pid=%s, self.s2m.get=%s" % (os.getpid(), repr(msg)))
            who = msg.get("who")
            if who == "worker":
                self.handle_mid(msg["mid"], msg["dat"])
            elif who == "svr":
                # self.m2w.put(msg['mid'])
                self.m2w.put(pick_url(msg["mid"]))
            elif who == "error":
                sys.stderr.write(msg["dat"])  # FIXME
                sys.stderr.write("\n")
            else:
                sys.stderr.write("Unknow msg:\n")
                sys.stderr.write(msg)
                sys.stderr.write("\n")

    def handle_mid(self, mid, dat):
        print(dat)
        if dat.startswith("Process "):
            dd = dat.split()
            act = dd[2].lower()
            print("mid=%s, act=%s" % (mid, act))
            set_flag(mid, act)
        elif dat.startswith("Downloading "):
            print("mid=[%s]" % mid)
            update_filename(mid, dat[12:-5])
Ejemplo n.º 6
0
def main():
    """Runs everything"""
    
    #clients
    hosts = ["localhost", "localhost"]
    NUMBER_OF_PROCESSES = len(hosts)
    
    # Create queues
    task_queue = Queue()
    done_queue = Queue()
    
    #submit tasks
    for host in hosts:
        task_queue.put(host)

    #Start worker processes
    for i in range(NUMBER_OF_PROCESSES):
        Process(target=worker, args=(task_queue, done_queue)).start()
    
     # Get and print results
    print 'Unordered results:'
    for i in range(len(hosts)):
        print '\t', done_queue.get().query    
    
    # Tell child processes to stop
    for i in range(NUMBER_OF_PROCESSES):
        task_queue.put('STOP')
        print "Stopping Process #%s" % i
Ejemplo n.º 7
0
 def run(self):
     '''run multiple replicates'''
     if self.data['verbosity'] <= 1:
         iterations = range(self.data['replicates'])
     else:
         widgets = ['{0} : '.format(self.data['name']), Percentage(),
                    ' ', Bar('='), ' ', ETA()]
         pbar = ProgressBar(widgets=widgets, maxval=self.data['replicates'],
                            term_width=get_terminal_size()[0] - 5)
         iterations = pbar((i for i in range(self.data['replicates'])))
     nJobs = max(min(self.data['jobs'], self.data['replicates']), 1)
     workQueue = Queue()
     resQueue = Queue()
     # put all replicates + stop signals in queue
     for replicate in range(self.data['replicates']):
         workQueue.put(replicate)
     for i in range(nJobs):
         workQueue.put(None)
     # spawn workers
     procs = [Process(target = self.calculate,
                      args = (workQueue, resQueue)) for j in range(nJobs)]
     for p in procs:
         p.start()
     # collect the results off the queue
     for i in iterations:
         try:
             self.__save(resQueue.get())
         except KeyboardInterrupt as e:
             raise ValueError("calculator terminated!")
     for p in procs:
         p.join()
     if self.failure_count.value():
         env.logger.info("{} invalid replicate(s)".format(self.failure_count.value()))
         self.data['replicates'] = self.data['replicates'] - self.failure_count.value()
     return {} if len(self.result) == 0 else dict(list(self.data.items()) + list(self.result.items()))
Ejemplo n.º 8
0
class Updater(Process):

    def __init__(self, maxsize=15):
        Process.__init__(self)
        #self.queue = Queue(maxsize)
        self.queue = Queue()
        self.queue_lock = Lock()
        self._exit = Event()

    def run(self):
        while not self._exit.is_set():
            #with self.queue_lock:
            self.queue.put(self.receive())
            #self.queue.put_nowait(self.receive())
            #if self.queue.full():
            #    try:
            #        self.queue.get_nowait()
            #    except:
            #        pass

    def stop(self):
        self._exit.set()
        # This leaves the process hanging on Windows
        #self.join(STOP_TIMEOUT)
        if self.is_alive():
            #TODO make a nicer warning
            print 'Terminating updater:', self
            self.terminate()

    def receive(self):
        raise NotImplementedError
Ejemplo n.º 9
0
    def test_report_hash_added_after_send(self, fromConfig, fromOptions, getLogger):
        # Side effect for fromConfig
        def fake_virts(logger, config):
            new_fake_virt = Mock()
            new_fake_virt.config.name = config.name
            return new_fake_virt

        fromConfig.side_effect = fake_virts
        options = Mock()
        options.interval = 0
        options.oneshot = True
        options.print_ = False
        options.log_file = ''
        options.log_dir = ''
        virtwho = VirtWho(self.logger, options, config_dir="/nonexistant")

        def send(report):
            report.state = AbstractVirtReport.STATE_FINISHED
            return True
        virtwho.send = Mock(side_effect=send)
        queue = Queue()
        virtwho.queue = queue
        virtwho.retry_after = 1
        virtwho.configManager.addConfig(self.config)
        virtwho.configManager.addConfig(self.second_config)
        queue.put(self.fake_report)
        queue.put(self.fake_domain_list)
        virtwho.run()

        self.assertEquals(virtwho.send.call_count, 2)
        self.assertEqual(virtwho.last_reports_hash[self.config.name], self.fake_report.hash)
        self.assertEqual(virtwho.last_reports_hash[self.second_config.name], self.fake_domain_list.hash)
Ejemplo n.º 10
0
class UpDown:

    def __init__(self, down_workers=2, up_workers=2, db=None):
        self.down_workers_num = down_workers
        self.up_workers_num = up_workers
        self.db = db
        self.base_url = "http://eol.jsc.nasa.gov/SearchPhotos/"
        self.down_workers = []
        self.up_workers = []
        self.to_upload = []
        self.q = Queue()

    def down_worker(self, download_url, image_id):
        """
        Download images and set the database after the download was complete.
        """
        down = ImageDownload(self.base_url + download_url)
        down.find_urls()
        if(down.dl()):
            self.db.update_image_downloaded(image_id, down.file_name)

    def up_worker(self, mission_id):
        """
        Check for images that are downloaded but not uploaded every minute.
        """
        while True:
            self.to_upload = self.db.get_to_upload(mission_id)
            print "No files to upload found!\n"
            if(len(list(self.to_upload)) > 0):
                print "Found a file to upload!\n"
                self.to_upload = list(self.db.get_to_upload(mission_id))
                self.q.put(self.to_upload)
            time.sleep(60)
Ejemplo n.º 11
0
def ParCalculate(systems,calc,cleanup=True,block=True,prefix="Calc_"):
    '''
    Run calculators in parallel for all systems. 
    Calculators are executed in isolated processes and directories.
    The resulting objects are returned in the list (one per input system).
    '''

    if type(systems) != type([]) :
        sysl=[systems]
    else :
        sysl=systems

    if block :
        iq=Queue(len(sysl)+1)
        oq=Queue(len(sysl)+1)
            
        # Create workers    
        for s in sysl:
            __PCalcProc(iq, oq, calc, prefix=prefix, cleanup=cleanup).start()

        # Put jobs into the queue
        for n,s in enumerate(sysl):
            iq.put([n,s])
            # Protection against too quick insertion
            time.sleep(0.2)
        
        if verbose : 
            print("Workers started:", len(sysl))
        
       # Collect the results
        res=[]
        while len(res)<len(sysl) :
            n,s=oq.get()
            res.append([n,s])
            #print("Got from oq:", n, s.get_volume(), s.get_pressure())
    else :
        # We do not need the multiprocessing complications for non-blocking 
        # workers. We just run all in sequence.
        basedir=os.getcwd()
        res=[]
        for n,s in enumerate(sysl):
            s.set_calculator(copy.deepcopy(calc))
            s.get_calculator().block=block
            place=tempfile.mkdtemp(prefix=prefix, dir=basedir)
            os.chdir(place)
            s.get_calculator().working_dir=place
            #print("Start at :", place)
            if hasattr(calc, 'name') and calc.name=='Siesta':
                s.get_potential_energy()
            else:
                s.get_calculator().calculate(s)
            os.chdir(basedir)
            #print("Submited", s.get_calculator().calc_finished(), os.getcwd())
            # Protection against too quick insertion
            time.sleep(0.2)
            res.append([n,s])
        if verbose : 
            print("Workers started:", len(sysl))
            
    return [r for ns,s in enumerate(sysl) for nr,r in res if nr==ns]
Ejemplo n.º 12
0
Archivo: neo.py Proyecto: vtphan/neo
class TaskQueue:
    N = 4
    symb = string.ascii_letters + string.digits
    
    def __init__(self):
        self.tasks = Queue()
        self.done = Queue()
        self.results = {}
        self.processes = []
        for i in range(TaskQueue.N):
            self.processes.append(Process(target=self.run_tasks))
            self.processes[-1].start()
        threading.Thread(target=self.collect_results).start()

    def add(self, f, args):
        id = ''.join(random.choice(TaskQueue.symb) for i in range(15))
        self.tasks.put((id, f,args))
        return id

    def get(self, id):
        return self.results.pop(id, '_NotFound_')
            
    def run_tasks(self):
        for id, func, args in iter(self.tasks.get, 'STOP'):
            result = func(*args)
            self.done.put((id,result))

    def collect_results(self):
        for id, r in iter(self.done.get, 'STOP'):
            self.results[id] = r
Ejemplo n.º 13
0
    def solve(self, problems, **kwargs):
        if type(problems) not in [list, ndarray]:
            problems = [problems]
        assert issubclass(type(problems[0]), _Problem), (
            'ParalelSolver argument is not a _Problem subclass')
        qin = Queue()
        qout = Queue()
        for i, pb in enumerate(problems):
            qin.put((i, pb))

        slaves = []
        for i in range(self.n_jobs):
            slaves += [WorkerSolver(qin, qout, id_w=i,
                                    debug=self.debug,
                                    **self.param)]
            qin.put((None, None))
            slaves[-1].start()

        # Join loop
        N_iter = len(problems)
        self.solutions = [0]*N_iter
        self.scores = [0]*N_iter
        for i in range(N_iter):
            idp, z, s = qout.get()
            self.solutions[idp] = z
            self.scores[idp] = s
            log.progress(name='Solver', iteration=i+1, i_max=N_iter)

        for s in slaves:
            s.join()
        self.problems = problems
        return self.solutions
Ejemplo n.º 14
0
class JobPool(object):

    """
    Pool container.
    """
    pool = None
    message_queue = None

    def __init__(self, max_instances=4):
        self.message_queue = Queue()
        self.pool = Pool(max_instances, execute_task, (self.message_queue,))
        atexit.register(self.clear)

    def add_analysis(self, analysis):
        """
        Add analysis to the pool.
        """
        analysis.set_started()
        self.message_queue.put(analysis)

    def clear(self):
        """
        Pool cleanup.
        """
        self.pool.terminate()
        self.pool.join()
Ejemplo n.º 15
0
class TweetManager:
      def __init__(self):
            self.sdb = boto.connect_sdb(setting.AWS_KEY, setting.AWS_SECRET)
            self.__keywords__ = get_filter_keywords(self.sdb)
            self.__cores__ = cpu_count()
            self.tweets_queue = Queue()
            self.db_tweets = self.sdb.get_domain(setting.SDB_DOMAIN)
            self.__buffer__ = ""
      
      def connect_twitter(self):
            self.conn = pycurl.Curl()
            self.conn.setopt(pycurl.POSTFIELDS,urllib.urlencode(self.__keywords__))
            self.conn.setopt(pycurl.USERPWD, "%s:%s" % (setting.TWITTER_ID, setting.TWITTER_PASSWORD))
            self.conn.setopt(pycurl.URL, setting.JSON_STREAMING_URI)
            print 'starting tweet_producer process'
            self.conn.setopt(pycurl.WRITEFUNCTION, lambda data: self.tweet_producer(data))

      
      def tweet_producer(self, tweet):
            self.__buffer__ += tweet
            if tweet.endswith("\r\n") and self.__buffer__.strip():
                  self.tweets_queue.put(self.__buffer__)
                  self.__buffer__ = ""

      def start(self):
            self.connect_twitter()
            print 'starting %d tweet_consumer process(s)' % self.__cores__
            self.consumers = [Process(target=tweet_consumer, args=(i, self.tweets_queue, self.db_tweets,))
                              for i in xrange(self.__cores__)]
            for c in self.consumers:
                  c.start()
            self.conn.perform()
Ejemplo n.º 16
0
def ProcessStuff(spp_list):
	print 'cpu_count() = %d\n' % multiprocessing.cpu_count()
	NUMBER_OF_PROCESSES = multiprocessing.cpu_count()
	TASKS = [(CallMaxent, (spp_list[i],)) for i in range(len(spp_list))]
	#TASKS2 = [(plus, (i, 8)) for i in range(10)]

    # Create queues
	task_queue = Queue()
	done_queue = Queue()

	# Submit tasks
	for task in TASKS:
		task_queue.put(task)

    # Start worker processes
	for i in range(NUMBER_OF_PROCESSES):
		Process(target=worker, args=(task_queue, done_queue)).start()

    # Get and print results
	print 'Unordered results:'
	for i in range(len(TASKS)):
		print '\t', done_queue.get()

    # Tell child processes to stop
	for i in range(NUMBER_OF_PROCESSES):
		task_queue.put('STOP')
Ejemplo n.º 17
0
def main():
    q = Queue()

    number_of_processes = 4
    plist = []
    
    for i in range(number_of_processes):
        plist.append(Process(target=f, args=('file_in.txt', i, q, number_of_processes)))

    for p in plist:
        p.start()
        
    for p in plist:
        p.join()
    
    q.put(None)
    
    print 'all joined!'
    # Loop through all the elements in the queue and write to file
    with open("file_out.txt", "w") as file_output:
        while True:
            item = q.get()
            print item

            if item is None:
                break
            print >>file_output, item
     
    print 'Done'
Ejemplo n.º 18
0
def parallel_work(jobs, nr_of_threads):
    """
    Setup queues, start the processes and wait until the job is done
    """
    work_queue = Queue()
    result_queue = Queue()
    result = {}

    for job in jobs:
        work_queue.put(job)

    if nr_of_threads > len(jobs):
        nr_of_threads = len(jobs)

    for i in range(nr_of_threads):
        worker = Process(target=check_plugin, args=(work_queue,result_queue))
        worker.start()

    while len(result.keys()) < len(jobs):
        data = result_queue.get()

        if " | " in data[1]:
            (status, output) = data[1].split(" | ")
        else:
            status = "UNKNOWN"
            output = data[1]

        result[data[0]] = {"status": status, "output": output}
        #print "Host " + data[0] + " " + status

    return result
Ejemplo n.º 19
0
    def _run_parallel(self, processes=2, progress_bar=False):
        """
        Run all matches in parallel

        Parameters
        ----------

        progress_bar : bool
            Whether or not to update the tournament progress bar
        """
        # At first sight, it might seem simpler to use the multiprocessing Pool
        # Class rather than Processes and Queues. However, Pool can only accept
        # target functions which can be pickled and instance methods cannot.
        work_queue = Queue()
        done_queue = Queue()
        workers = self._n_workers(processes=processes)

        chunks = self.match_generator.build_match_chunks()
        for chunk in chunks:
            work_queue.put(chunk)

        self._start_workers(workers, work_queue, done_queue)
        self._process_done_queue(workers, done_queue, progress_bar=progress_bar)

        return True
Ejemplo n.º 20
0
class MultiSegmentWriter(IndexWriter):
    def __init__(self, index, procs=2, **writerargs):
        self.index = index
        self.lock = index.storage.lock(index.indexname + "_LOCK")
        self.tasks = []
        self.postingqueue = Queue()
        #self.resultqueue = Queue()
        
        names = [index._next_segment_name() for _ in xrange(procs)]
        
        self.tasks = [SegmentWritingTask(index.storage, index.indexname,
                                         segname, writerargs, self.postingqueue)
                      for segname in names]
        for task in self.tasks:
            task.start()
        
    def add_document(self, **args):
        self.postingqueue.put(args)
        
    def cancel(self):
        for task in self.tasks:
            task.cancel()
        self.lock.release()
        
    def commit(self):
        procs = len(self.tasks)
        for _ in xrange(procs):
            self.postingqueue.put(None)
        for task in self.tasks:
            print "Joining", task
            task.join()
            self.index.segments.append(task.get_segment())
        self.index.commit()
        self.lock.release()
Ejemplo n.º 21
0
def get_citing_papers(**args):
    # create the queues
    tasks = Queue()
    results = Queue()
    # how many threads are there to be used
    if 'threads' in args:
        threads = args['threads']
    else:
        threads = cpu_count()
    bibcodes = args.get('bibcodes',[])
    # initialize the "harvesters" (each harvester get the citations for a bibcode)
    harvesters = [ MongoCitationListHarvester(tasks, results) for i in range(threads)]
    # start the harvesters
    for b in harvesters:
        b.start()
    # put the bibcodes in the tasks queue
    num_jobs = 0
    for bib in bibcodes:
        tasks.put(bib)
        num_jobs += 1
    # add some 'None' values at the end of the tasks list, to faciliate proper closure
    for i in range(threads):
        tasks.put(None)
    # gather all results into one citation dictionary
    cit_list = []
    while num_jobs:
        data = results.get()
        cit_list += data.get('citations',[])
        num_jobs -= 1
    return cit_list
Ejemplo n.º 22
0
    def test():

        queue = Queue()

        proc = Process(target=doNothing, args=(queue, ))
        proc.start()

        _logger.info("Started dummy process with PID %d", proc.pid)
        startCodeCheckerServerAttachedToPid(proc.pid)
        time.sleep(3)
        _logger.info("Allowing the dummy process to finish")
        queue.put(1)
        proc.join()

        if utils.isProcessRunning(proc.pid):
            _logger.warning("Dummy process %d was still running", proc.pid)
            proc.terminate()
            time.sleep(1)
            it.assertFalse(utils.isProcessRunning(proc.pid),
                           "Process %d is still running after terminating "
                           "it!" % proc.pid)

        time.sleep(1)
        _logger.info("Server should have died by now")

        with it.assertRaises(requests.ConnectionError):
            requests.post(it._url + '/get_diagnose_info')
Ejemplo n.º 23
0
def start_combo(argv):
    queue = Queue(10)
    test_input = TestInputParser.get_test_input(argv)
    thread = Thread(target=combo, args=(queue, test_input))
    thread.start()
    time.sleep(24 * 60 * 60)
    queue.put("stop")
Ejemplo n.º 24
0
class YaraJobPool(object):

    """
    Yara pool container.
    """
    pool = None
    message_queue = None

    def __init__(self, max_instances=3):
        self.message_queue = Queue()
        self.pool = Pool(max_instances, execute_yara_task,
                         (self.message_queue,))
        atexit.register(self.clear)

    def add_yara_task(self, yara_task):
        """
        Adds the yara task.
        """
        self.message_queue.put(yara_task)

    def clear(self):
        """
        Pool cleanup.
        """
        self.pool.terminate()
        self.pool.join()
Ejemplo n.º 25
0
def start_load(argv):
    queue = Queue(10)
    test_input = TestInputParser.get_test_input(argv)
    load_info = {
        'server_info': [test_input.servers[0]],
        'memcached_info': {
            'bucket_name': "default",
            'bucket_port': "11210",
            'bucket_password': "",
            },
        'operation_info': {
            'operation_distribution': {'set': 10},
            'valuesize_distribution': {20: 30, 30: 5, 25: 5},
            'create_percent': 25,
            'threads': 6,
            },
        'limit_info': {
            'max_items': 0,
            'operation_count': 0,
            'time': time.time() + 24 * 60 * 60,
            'max_size': 0,
            },
        }
    thread = Thread(target=loadrunner, args=(queue, test_input.servers, load_info))
    thread.start()
    time.sleep(24 * 60 * 60)
    queue.put("stop")
def getFeatureMultiprocessing(subProcFunc, blwFile, outputFile, funcArgs, keyword=['Vietnamese_by_catalog', 'ppVietnamese_by_catalog']):
    START_TIME = time.time()
    # getFreqWordsForFileFromDict(['data/ppVietnamese_by_catalog/Easy/ct24/ct24 (100).txt',12.35,3, 4], 'data/TanSoTu.txt')
    # getDataNFeatureFromFile('test_data.txt', 'output/test_Vietnamese_output_classifier.csv', 'test')
    # X3 = getDataNFeatureFromFile('Difficult_data.txt', 'output/vietnamesewn_Difficult_output.csv', 3)
    # X1 = getDataNFeatureFromFile('Easy_data.txt','output/vietnamesewn_Easy_output.csv', 1)
    # X2 = getDataNFeatureFromFile('Normal_data.txt','output/vietnamesewn_Normal_output.csv', 2)
    _tempfile = open(blwFile, 'r')
    temp = _tempfile.read().splitlines()
    _tempfile.close()
    filesQueue = Queue()
    RESULT_QUEUE = Queue()
    for i in range(1, len(temp)):
            temp[i] = temp[i].split(',')
            temp[i][0] = re.sub(keyword[0], keyword[1], temp[i][0])
            if not keyword[0] == '' and (not temp[i][0].find(keyword[-1]) > 0):
                print('[ERROR] processing ', temp[i][0])
                print('sub', keyword[0], keyword[-1], re.sub(keyword[0], keyword[-1], temp[i][0]))
                return
            filesQueue.put(temp[i])
    PROCESS_LOCK = Lock()
    myProcess = []
    for processID in range(MAX_PROCESS):
        myProcess.append(Process(target=getDataNFeatureFromFileForAProc, args=(PROCESS_LOCK, RESULT_QUEUE, filesQueue, subProcFunc, funcArgs)))
    myProcess.append(Process(target=writeOutResult, args=(RESULT_QUEUE, outputFile)))

    for _process in myProcess:
        _process.start()
    for _process in myProcess:
        _process.join()
    print('total runtime:', time.time() - START_TIME)
Ejemplo n.º 27
0
def start_backup(argv):
    queue = Queue(10)
    test_input = TestInputParser.get_test_input(argv)
    thread = Thread(target=backup, args=(queue, test_input.servers))
    thread.start()
    time.sleep(24 * 60 * 60)
    queue.put("stop")
Ejemplo n.º 28
0
Archivo: __init__.py Proyecto: yk/fuel
class BackgroundProcess(object):
    """A background process that reads batches and stores them in a queue.

    The :meth:`main` method needs to be called in order to start reading
    batches into the queue. Note that this process will run infinitely;
    start it as a :attr:`~multiprocessing.Process.daemon` to make sure it
    will get killed when the main process exits.

    Parameters
    ----------
    data_stream : :class:`.DataStream` or :class:`Transformer`
        The data stream from which to read batches.
    max_batches : int
        The maximum number of batches to store in the queue. If reached,
        the process wil block until a batch is popped from the queue.

    """
    def __init__(self, data_stream, max_batches):
        self.data_stream = data_stream
        self.batches = Queue(max_batches)
        self.run_background = True

    def main(self):
        while True:
            iterator = self.data_stream.get_epoch_iterator()
            for batch in iterator:
                self.batches.put(batch)
            self.batches.put(StopIteration)

    def get_next_data(self):
        return self.batches.get()
def processFiles(patch_dir):
    root = os.getcwd()
    glbl.data_dirs = {}
    if root != patch_dir: working_path = root+"/"+patch_dir
    else: working_path = root

    for path, dirs, files in os.walk(working_path):
        if len(dirs) == 0: glbl.data_dirs[path] = ''
    

    # Multiprocessing Section
    #########################################
    Qids = glbl.data_dirs.keys()
    manager = Manager()                                      # creates shared memory manager object
    results = manager.dict()                                 # Add dictionary to manager, so it can be accessed across processes
    nextid = Queue()                                         # Create Queue object to serve as shared id generator across processes
    for qid in Qids: nextid.put(qid)                         # Load the ids to be tested into the Queue
    for x in range(0,multiprocessing.cpu_count()):           # Create one process per logical CPU
        p = Process(target=processData, args=(nextid,results)) # Assign process to processCBR function, passing in the Queue and shared dictionary
        glbl.jobs.append(p)                                   # Add the process to a list of running processes
        p.start()                                             # Start process running
    for j in glbl.jobs:
        j.join()                                              # For each process, join them back to main, blocking on each one until finished
    
    # write out results
    c = 1
    sets = results.keys()
    sets.sort()
    for x in sets:
        if results[x] != 'None':
            FINAL = open('result'+str(c)+'.txt','w')
            n = "\n************************************************************************************************\n"
            FINAL.write(n+"* "+x+'    *\n'+n+results[x]+"\n")
            FINAL.close()     
            c += 1
Ejemplo n.º 30
0
    def test_same_report_filtering(self, fromConfig, fromOptions, getLogger):
        def fake_virts(logger, config):
            new_fake_virt = Mock()
            new_fake_virt.config.name = config.name
            return new_fake_virt

        fromConfig.side_effect = fake_virts
        options = Mock()
        options.interval = 0
        options.oneshot = True
        options.print_ = False
        options.log_dir = ''
        options.log_file = ''
        virtwho = VirtWho(self.logger, options, config_dir="/nonexistant")

        queue = Queue()
        # Create another report with same hash
        report2 = HostGuestAssociationReport(self.config, self.fake_report.association)
        self.assertEqual(self.fake_report.hash, report2.hash)

        def send(report):
            report.state = AbstractVirtReport.STATE_FINISHED
            # Put second report when the first is done
            queue.put(report2)
            return True
        virtwho.send = Mock(side_effect=send)
        virtwho.queue = queue
        virtwho.retry_after = 1
        virtwho.configManager.addConfig(self.config)
        queue.put(self.fake_report)
        virtwho.run()

        self.assertEquals(virtwho.send.call_count, 1)
Ejemplo n.º 31
0
            '.*?<span class="rating_num" .*?>(?P<rating_num>.*?)</span>.*?<span>(?P<comment_num>.*?)评价</span>',
            re.S)
        ret = com.finditer(s)
        for i in ret:
            print({
                "id": i.group("id"),
                "title": i.group("title"),
                "rating_num": i.group("rating_num"),
                "comment_num": i.group("comment_num"),
            })


if __name__ == '__main__':
    count = 0
    q = Queue()
    p_l = []
    for i in range(10):
        count += 25
        p = Process(
            target=producer,
            args=(q,
                  'https://movie.douban.com/top250?start=%s&filter=' % count))
        p.start()
        p_l.append(p)
    for i in range(5):
        c = Process(target=consumer, args=(q, ))
        c.start()
    for i in p_l:
        i.join()
    q.put(None)
Ejemplo n.º 32
0
class EpollServer(TServer):
    def __init__(self, *args, **kwargs):
        super(EpollServer, self).__init__(*args)
        self._clients = {}
        self._worker_processes = {}

        event_queue_size = kwargs.get('event_queue_size', 100)
        self._worker_process_number = kwargs.get('worker_process_number', 1)

        self._tasks = Queue(event_queue_size)
        """
        创建socket两端,thrift服务端处理完若干thrift客户端请求后,此socket write端将这些thrift客户端对应的文件描述符以
        分隔符','连接起来发送给read端,read端读取到这些文件描述符之后,依次将每个请求的响应发送给thrift客户端
        """
        self._read_side, self._write_side = socket.socketpair()
        self._stop_flag = Value('b', False)
        self._stop_read_flag = Value('b', False)
        self._epoll = select.epoll()
        # EPOLLIN设置读操作位
        self._epoll.register(self._read_side, select.EPOLLIN)

        self._harakiri = kwargs.get('harakiri', 5)

        manager = Manager()
        self._responses = manager.dict()
        self._connection_limiter = ConnectionLimiter(self._get_queue_size,
                                                     event_queue_size)

    def _get_queue_size(self):
        try:
            return self._tasks.qsize()
        except NotImplementedError:
            return 0

    def _register_harakiri(self):
        signal.signal(signal.SIGALRM, self._do_harakiri)

    def _do_harakiri(self, signum, frame):
        raise Exception('Execution killed after %s seconds' % self._harakiri)

    @contextmanager
    def _harakiri_execute(self):
        signal.alarm(self._harakiri)
        try:
            yield
        finally:
            signal.alarm(0)

    def set_worker_process_number(self, num):
        self._worker_process_number = num

    def stop(self):
        self._stop_read_flag.vale = True
        # 做缓冲
        for put_count in range(self._worker_process_number):
            self._tasks.put([None, None])
        # close队列,需要看看这块的知识
        self._tasks.close()
        self._tasks.join_thread()
        self._stop_flag.value = True
        # 相当于close底层socket
        self.serverTransport.close()

    def serve(self):
        self.serverTransport.listen()
        self.serverTransport.handle.setblocking(0)
        # 注册thrift transport监视的文件描述符
        self._epoll.register(self.serverTransport.handle.fileno(),
                             select.EPOLLIN)

        self._stop_flag.value = False
        self._stop_read_flag.value = False
        # fork工作进程
        for proc_no in range(self._worker_process_number):
            self._fork_worker_process(proc_no)
        # 子进程异常终止重新fork
        signal.signal(signal.SIGCHLD, self._refork_worker_process)

        while not self._stop_flag.value:
            try:
                self.handle()
            except (SystemExit, KeyboardInterrupt):
                break

    def _fork_worker_process(self, proc_no=0):
        process = Process(target=self._start_worker_process, args=(proc_no, ))
        process.start()
        self._worker_processes[proc_no] = process

    def _refork_worker_process(self, signum, frame):
        if not self._stop_flag.value:
            for proc_no, worker_process in self._worker_processes.iteritems():
                if not worker_process.is_alive():
                    self._fork_worker_process(proc_no)

    def _start_worker_process(self, proc_no):
        CommonUtil.set_proctitle('sub_process_%s' % proc_no)
        self._register_harakiri()
        # 进程结束信号
        signal.signal(signal.SIGTERM, self._terminate_handler)
        while True:
            fileno = None
            try:
                # 从thrift客户端请求队列里取出客户端传送的数据和文件描述符
                message, fileno = self._tasks.get()
                itransport = TTransport.TMemoryBuffer(message)
                otransport = TTransport.TMemoryBuffer()
                iprot = self.inputProtocolFactory.getProtocol(itransport)
                oprot = self.outputProtocolFactory.getProtocol(otransport)

                if message is None:
                    break
                with self._harakiri_execute():
                    # thrift服务端处理请求
                    self.processor.process(iprot, oprot)
                # 保存文件描述符对应的客户端连接的响应数据
                self._responses[fileno] = (True, otransport.getvalue())
                # 将文件描述符以','分隔写入write端
                self._write_side.sendall(str(fileno) + ',')
            except Exception:
                if fileno:
                    self._responses[fileno] = (False, b'')
                    self._write_side.sendall(str(fileno) + ',')

    def _terminate_handler(self, signum, frame):
        raise SystemExit()

    def handle(self):
        try:
            events = self._epoll.poll(1)
        except Exception as e:
            # 慢系统调用异常
            if CommonUtil.get_exception_errno(e) == errno.EINTR:
                events = []
            else:
                raise

        for fileno, event in events:
            if fileno == self.serverTransport.handle.fileno(
            ) and not self._stop_read_flag.value:
                # 获取到thrift客户端连接
                client = self.serverTransport.accept().handle
                self._clients[client.fileno()] = EpollConnection(
                    client, self._epoll)
                # epoll注册客户端对应的文件描述符
                self.register_epollin(client.fileno())
            elif event & select.EPOLLIN:
                if fileno == self._read_side.fileno():
                    msg = self._read_side.recv(1024)
                    for client_fileno in msg.split(',')[:-1]:
                        if client_fileno == '' or client_fileno is None:
                            continue
                        client_fileno = int(client_fileno)
                        connection = self._clients.get(client_fileno)
                        response = self._responses.get(client_fileno)
                        if connection and response:
                            connection.ready(*response)
                elif not self._stop_read_flag.value:
                    connection = self._clients.get(fileno)
                    if connection:
                        connection.read()
                        if connection.get_status(
                        ) == ConnectionStatus.WAIT_PROCESS:
                            try:
                                if self._connection_limiter.try_acquire():
                                    self._tasks.put_nowait([
                                        connection.get_msg(),
                                        connection.get_fileno()
                                    ])
                                else:
                                    connection.reset()
                                    del self._clients[fileno]
                            except _Queue.Full:
                                connection.reset()
                                del self._clients[fileno]
                else:
                    connection = self._clients[fileno]
                    connection.reset()
                    del self._clients[fileno]
            elif event & select.EPOLLOUT:
                connection = self._clients.get(fileno)
                if connection:
                    connection.write()
            elif event & select.EPOLLHUP:
                connection = self._clients.get(fileno)
                if connection:
                    connection.close()
                    del self._clients[fileno]

    def register_epollin(self, fileno):
        self._epoll.register(fileno, select.EPOLLIN)

    def register_epollout(self, fileno):
        self._epoll.register(fileno, select.EPOLLOUT)
Ejemplo n.º 33
0
class RestoreVMsWindow(Ui_Restore, QWizard):

    __pyqtSignals__ = ("restore_progress(int)","backup_progress(int)")

    def __init__(self, app, qvm_collection, blk_manager, parent=None):
        super(RestoreVMsWindow, self).__init__(parent)

        self.app = app
        self.qvm_collection = qvm_collection
        self.blk_manager = blk_manager

        self.restore_options = None
        self.vms_to_restore = None
        self.func_output = []
        self.feedback_queue = Queue()
        self.canceled = False
        self.tmpdir_to_remove = None
        self.error_detected = Event()

        self.excluded = {}

        self.vm = self.qvm_collection[0]

        assert self.vm != None

        self.setupUi(self)

        self.select_vms_widget = MultiSelectWidget(self)
        self.select_vms_layout.insertWidget(1, self.select_vms_widget)

        self.connect(self, SIGNAL("currentIdChanged(int)"), self.current_page_changed)
        self.connect(self, SIGNAL("restore_progress(QString)"), self.commit_text_edit.append)
        self.connect(self, SIGNAL("backup_progress(int)"), self.progress_bar.setValue)
        self.dir_line_edit.connect(self.dir_line_edit, SIGNAL("textChanged(QString)"), self.backup_location_changed)
        self.connect(self.verify_only, SIGNAL("stateChanged(int)"),
                     self.on_verify_only_toogled)

        self.select_dir_page.isComplete = self.has_selected_dir
        self.select_vms_page.isComplete = self.has_selected_vms
        self.confirm_page.isComplete = self.all_vms_good
        #FIXME
        #this causes to run isComplete() twice, I don't know why
        self.select_vms_page.connect(self.select_vms_widget, SIGNAL("selected_changed()"), SIGNAL("completeChanged()"))

        fill_appvms_list(self)
        self.__init_restore_options__()

    @pyqtSlot(name='on_select_path_button_clicked')
    def select_path_button_clicked(self):
        select_path_button_clicked(self, True)

    def on_ignore_missing_toggled(self, checked):
        self.restore_options['use-default-template'] = checked
        self.restore_options['use-default-netvm'] = checked

    def on_ignore_uname_mismatch_toggled(self, checked):
        self.restore_options['ignore-username-mismatch'] = checked

    def on_verify_only_toogled(self, checked):
        self.restore_options['verify-only'] = bool(checked)

    def cleanupPage(self, p_int):
        if self.page(p_int) is self.select_vms_page:
            self.vms_to_restore = None
        else:
            super(RestoreVMsWindow, self).cleanupPage(p_int)

    def __fill_vms_list__(self):
        if self.vms_to_restore is not None:
            return

        self.select_vms_widget.selected_list.clear()
        self.select_vms_widget.available_list.clear()

        self.target_appvm = None
        if self.appvm_combobox.currentIndex() != 0:   #An existing appvm chosen
            self.target_appvm = self.qvm_collection.get_vm_by_name(
                    str(self.appvm_combobox.currentText()))

        try:
            self.vms_to_restore = backup.backup_restore_prepare(
                    unicode(self.dir_line_edit.text()),
                    unicode(self.passphrase_line_edit.text()),
                    options=self.restore_options,
                    host_collection=self.qvm_collection,
                    encrypted=self.encryption_checkbox.isChecked(),
                    appvm=self.target_appvm)

            for vmname in self.vms_to_restore:
                if vmname.startswith('$'):
                    # Internal info
                    continue
                self.select_vms_widget.available_list.addItem(vmname)
        except QubesException as ex:
            QMessageBox.warning (None, "Restore error!", str(ex))

    def __init_restore_options__(self):
        if not self.restore_options:
            self.restore_options = {}
            backup.backup_restore_set_defaults(self.restore_options)

        if 'use-default-template' in self.restore_options and 'use-default-netvm' in self.restore_options:
            val = self.restore_options['use-default-template'] and self.restore_options['use-default-netvm']
            self.ignore_missing.setChecked(val)
        else:
            self.ignore_missing.setChecked(False)

        if 'ignore-username-mismatch' in self.restore_options:
            self.ignore_uname_mismatch.setChecked(self.restore_options['ignore-username-mismatch'])

    def gather_output(self, s):
        self.func_output.append(s)

    def restore_error_output(self, s):
        self.error_detected.set()
        self.feedback_queue.put((SIGNAL("restore_progress(QString)"),
                                 u'<font color="red">{0}</font>'.format(s)))

    def restore_output(self, s):
        self.feedback_queue.put((SIGNAL("restore_progress(QString)"),
                                 u'<font color="black">{0}</font>'.format(s)))

    def update_progress_bar(self, value):
        self.feedback_queue.put((SIGNAL("backup_progress(int)"), value))

    def __do_restore__(self, thread_monitor):
        err_msg = []
        self.qvm_collection.lock_db_for_writing()
        try:
            backup.backup_restore_do(self.vms_to_restore,
                                     self.qvm_collection,
                                     print_callback=self.restore_output,
                                     error_callback=self.restore_error_output,
                                     progress_callback=self.update_progress_bar)
        except backup.BackupCanceledError as ex:
            self.canceled = True
            self.tmpdir_to_remove = ex.tmpdir
            err_msg.append(unicode(ex))
        except Exception as ex:
            print "Exception:", ex
            err_msg.append(unicode(ex))
            err_msg.append("Partially restored files left in "
                           "/var/tmp/restore_*, investigate them and/or clean them up")

        self.qvm_collection.unlock_db()
        if self.canceled:
            self.emit(SIGNAL("restore_progress(QString)"),
                      '<b><font color="red">{0}</font></b>'
                      .format("Restore aborted!"))
        elif len(err_msg) > 0 or self.error_detected.is_set():
            if len(err_msg) > 0:
                thread_monitor.set_error_msg('\n'.join(err_msg))
            self.emit(SIGNAL("restore_progress(QString)"),
                      '<b><font color="red">{0}</font></b>'
                      .format("Finished with errors!"))
        else:
            self.emit(SIGNAL("restore_progress(QString)"),
                      '<font color="green">{0}</font>'
                      .format("Finished successfully!"))

        thread_monitor.set_finished()

    def current_page_changed(self, id):

        old_sigchld_handler = signal.signal(signal.SIGCHLD, signal.SIG_DFL)
        if self.currentPage() is self.select_vms_page:
            self.__fill_vms_list__()

        elif self.currentPage() is self.confirm_page:
            for v in self.excluded:
                self.vms_to_restore[v] = self.excluded[v]
            self.excluded = {}
            for i in range(self.select_vms_widget.available_list.count()):
                vmname =  self.select_vms_widget.available_list.item(i).text()
                self.excluded[str(vmname)] = self.vms_to_restore[str(vmname)]
                del self.vms_to_restore[str(vmname)]

            del self.func_output[:]
            self.vms_to_restore = backup.restore_info_verify(self.vms_to_restore,
                                                             self.qvm_collection)
            backup.backup_restore_print_summary(
                    self.vms_to_restore, print_callback = self.gather_output)
            self.confirm_text_edit.setReadOnly(True)
            self.confirm_text_edit.setFontFamily("Monospace")
            self.confirm_text_edit.setText("\n".join(self.func_output))

            self.confirm_page.emit(SIGNAL("completeChanged()"))

        elif self.currentPage() is self.commit_page:
            self.button(self.FinishButton).setDisabled(True)
            self.showFileDialog.setEnabled(True)
            self.showFileDialog.setChecked(self.showFileDialog.isEnabled()
                                           and str(self.dir_line_edit.text())
                                           .count("media/") > 0)

            self.thread_monitor = ThreadMonitor()
            thread = threading.Thread (target= self.__do_restore__ , args=(self.thread_monitor,))
            thread.daemon = True
            thread.start()

            while not self.thread_monitor.is_finished():
                self.app.processEvents()
                time.sleep (0.1)
                try:
                    for (signal_to_emit,data) in iter(self.feedback_queue.get_nowait,None):
                        self.emit(signal_to_emit,data)
                except Empty:
                    pass

            if not self.thread_monitor.success:
                if self.canceled:
                    if self.tmpdir_to_remove and \
                        QMessageBox.warning(None, "Restore aborted",
                                            "Do you want to remove temporary "
                                            "files from %s?" % self
                                                    .tmpdir_to_remove,
                                            QMessageBox.Yes, QMessageBox.No) == \
                            QMessageBox.Yes:
                        shutil.rmtree(self.tmpdir_to_remove)
                else:
                    QMessageBox.warning (None, "Backup error!", "ERROR: {1}"
                                      .format(self.vm.name, self.thread_monitor.error_msg))

            if self.showFileDialog.isChecked():
                self.emit(SIGNAL("restore_progress(QString)"),
                          '<b><font color="black">{0}</font></b>'.format(
                              "Please unmount your backup volume and cancel "
                              "the file selection dialog."))
                if self.target_appvm:
                    self.target_appvm.run("QUBESRPC %s dom0" % "qubes"
                                                               ".SelectDirectory")
                else:
                    file_dialog = QFileDialog()
                    file_dialog.setReadOnly(True)
                    file_dialog.getExistingDirectory(
                        self, "Detach backup device",
                        os.path.dirname(unicode(self.dir_line_edit.text())))
            self.progress_bar.setValue(100)
            self.button(self.FinishButton).setEnabled(True)
            self.button(self.CancelButton).setEnabled(False)
            self.showFileDialog.setEnabled(False)

        signal.signal(signal.SIGCHLD, old_sigchld_handler)

    def all_vms_good(self):
        for vminfo in self.vms_to_restore.values():
            if not vminfo.has_key('vm'):
                continue
            if not vminfo['good-to-go']:
                return False
        return True

    def reject(self):
        if self.currentPage() is self.commit_page:
            if backup.backup_cancel():
                self.emit(SIGNAL("restore_progress(QString)"),
                          '<font color="red">{0}</font>'
                          .format("Aborting the operation..."))
                self.button(self.CancelButton).setDisabled(True)
        else:
            self.done(0)

    def has_selected_dir(self):
        backup_location = unicode(self.dir_line_edit.text())
        if not backup_location:
            return False
        if self.appvm_combobox.currentIndex() == 0:
            if os.path.isfile(backup_location) or \
                    os.path.isfile(os.path.join(backup_location, 'qubes.xml')):
                return True
        else:
            return True

        return False

    def has_selected_vms(self):
        return self.select_vms_widget.selected_list.count() > 0

    def backup_location_changed(self, new_dir = None):
        self.select_dir_page.emit(SIGNAL("completeChanged()"))
Ejemplo n.º 34
0
    pool = Pool(args.num_workers, worker, (input_q, output_q))

    if (args.stream):
        print('Reading from hls stream.')
        video_capture = HLSVideoStream(src=args.stream).start()
    else:
        print('Reading from webcam.')
        video_capture = WebcamVideoStream(src=args.video_source,
                                          width=args.width,
                                          height=args.height).start()

    fps = FPS().start()

    while True:  # fps._numFrames < 120
        frame = video_capture.read()
        input_q.put(frame)

        t = time.time()

        output_rgb = cv2.cvtColor(output_q.get(), cv2.COLOR_RGB2BGR)
        cv2.imshow('Video', output_rgb)
        fps.update()

        print('[INFO] elapsed time: {:.2f}'.format(time.time() - t))

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps.stop()
    print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed()))
    print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
Ejemplo n.º 35
0
class SafariBooks:
    LOGIN_URL = ORLY_BASE_URL + "/member/auth/login/"
    LOGIN_ENTRY_URL = SAFARI_BASE_URL + "/login/unified/?next=/home/"

    API_TEMPLATE = SAFARI_BASE_URL + "/api/v1/book/{0}/"

    HEADERS = {
        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
        "accept-encoding": "gzip, deflate",
        "accept-language": "it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7",
        "cache-control": "no-cache",
        "cookie": "",
        "pragma": "no-cache",
        "origin": SAFARI_BASE_URL,
        "referer": LOGIN_ENTRY_URL,
        "upgrade-insecure-requests": "1",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/60.0.3112.113 Safari/537.36"
    }

    BASE_01_HTML = "<!DOCTYPE html>\n" \
                   "<html lang=\"en\" xml:lang=\"en\" xmlns=\"http://www.w3.org/1999/xhtml\"" \
                   " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"" \
                   " xsi:schemaLocation=\"http://www.w3.org/2002/06/xhtml2/" \
                   " http://www.w3.org/MarkUp/SCHEMA/xhtml2.xsd\"" \
                   " xmlns:epub=\"http://www.idpf.org/2007/ops\">\n" \
                   "<head>\n" \
                   "{0}\n" \
                   "<style type=\"text/css\">" \
                   "body{{margin:1em;}}" \
                   "#sbo-rt-content *{{text-indent:0pt!important;}}#sbo-rt-content .bq{{margin-right:1em!important;}}"

    KINDLE_HTML = "body{{background-color:transparent!important;}}" \
                  "#sbo-rt-content *{{word-wrap:break-word!important;" \
                  "word-break:break-word!important;}}#sbo-rt-content table,#sbo-rt-content pre" \
                  "{{overflow-x:unset!important;overflow:unset!important;" \
                  "overflow-y:unset!important;white-space:pre-wrap!important;}}"

    BASE_02_HTML = "</style>" \
                   "</head>\n" \
                   "<body>{1}</body>\n</html>"

    CONTAINER_XML = "<?xml version=\"1.0\"?>" \
                    "<container version=\"1.0\" xmlns=\"urn:oasis:names:tc:opendocument:xmlns:container\">" \
                    "<rootfiles>" \
                    "<rootfile full-path=\"OEBPS/content.opf\" media-type=\"application/oebps-package+xml\" />" \
                    "</rootfiles>" \
                    "</container>"

    # Format: ID, Title, Authors, Description, Subjects, Publisher, Rights, Date, CoverId, MANIFEST, SPINE, CoverUrl
    CONTENT_OPF = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" \
                  "<package xmlns=\"http://www.idpf.org/2007/opf\" unique-identifier=\"bookid\" version=\"2.0\" >\n" \
                  "<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" " \
                  " xmlns:opf=\"http://www.idpf.org/2007/opf\">\n"\
                  "<dc:title>{1}</dc:title>\n" \
                  "{2}\n" \
                  "<dc:description>{3}</dc:description>\n" \
                  "{4}" \
                  "<dc:publisher>{5}</dc:publisher>\n" \
                  "<dc:rights>{6}</dc:rights>\n" \
                  "<dc:language>en-US</dc:language>\n" \
                  "<dc:date>{7}</dc:date>\n" \
                  "<dc:identifier id=\"bookid\">{0}</dc:identifier>\n" \
                  "<meta name=\"cover\" content=\"{8}\"/>\n" \
                  "</metadata>\n" \
                  "<manifest>\n" \
                  "<item id=\"ncx\" href=\"toc.ncx\" media-type=\"application/x-dtbncx+xml\" />\n" \
                  "{9}\n" \
                  "</manifest>\n" \
                  "<spine toc=\"ncx\">\n{10}</spine>\n" \
                  "<guide><reference href=\"{11}\" title=\"Cover\" type=\"cover\" /></guide>\n" \
                  "</package>"

    # Format: ID, Depth, Title, Author, NAVMAP
    TOC_NCX = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\" ?>\n" \
              "<!DOCTYPE ncx PUBLIC \"-//NISO//DTD ncx 2005-1//EN\"" \
              " \"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd\">\n" \
              "<ncx xmlns=\"http://www.daisy.org/z3986/2005/ncx/\" version=\"2005-1\">\n" \
              "<head>\n" \
              "<meta content=\"ID:ISBN:{0}\" name=\"dtb:uid\"/>\n" \
              "<meta content=\"{1}\" name=\"dtb:depth\"/>\n" \
              "<meta content=\"0\" name=\"dtb:totalPageCount\"/>\n" \
              "<meta content=\"0\" name=\"dtb:maxPageNumber\"/>\n" \
              "</head>\n" \
              "<docTitle><text>{2}</text></docTitle>\n" \
              "<docAuthor><text>{3}</text></docAuthor>\n" \
              "<navMap>{4}</navMap>\n" \
              "</ncx>"

    def __init__(self, args):
        self.args = args
        self.display = Display("info_%s.log" % escape(args.bookid))
        self.display.intro()

        self.cookies = {}
        self.jwt = {}

        if not args.cred:
            if not os.path.isfile(COOKIES_FILE):
                self.display.exit("Login: unable to find cookies file.\n"
                                  "    Please use the --cred option to perform the login.")

            self.cookies = json.load(open(COOKIES_FILE))

        else:
            self.display.info("Logging into Safari Books Online...", state=True)
            self.do_login(*args.cred)
            if not args.no_cookies:
                json.dump(self.cookies, open(COOKIES_FILE, "w"))

        self.book_id = args.bookid
        self.api_url = self.API_TEMPLATE.format(self.book_id)

        self.display.info("Retrieving book info...")
        self.book_info = self.get_book_info()
        self.display.book_info(self.book_info)

        self.display.info("Retrieving book chapters...")
        self.book_chapters = self.get_book_chapters()

        self.chapters_queue = self.book_chapters[:]

        if len(self.book_chapters) > sys.getrecursionlimit():
            sys.setrecursionlimit(len(self.book_chapters))

        self.book_title = self.book_info["title"]
        self.base_url = self.book_info["web_url"]

        self.clean_book_title = "".join(self.escape_dirname(self.book_title).split(",")[:2]) \
                                + " ({0})".format(self.book_id)

        books_dir = os.path.join(PATH, "Books")
        if not os.path.isdir(books_dir):
            os.mkdir(books_dir)

        self.BOOK_PATH = os.path.join(books_dir, self.clean_book_title)
        self.css_path = ""
        self.images_path = ""
        self.create_dirs()
        self.display.info("Output directory:\n    %s" % self.BOOK_PATH)

        self.chapter_title = ""
        self.filename = ""
        self.css = []
        self.images = []

        self.display.info("Downloading book contents... (%s chapters)" % len(self.book_chapters), state=True)
        self.BASE_HTML = self.BASE_01_HTML + (self.KINDLE_HTML if not args.no_kindle else "") + self.BASE_02_HTML

        self.cover = False
        self.get()
        if not self.cover:
            self.cover = self.get_default_cover()
            cover_html = self.parse_html(
                html.fromstring("<div id=\"sbo-rt-content\"><img src=\"Images/{0}\"></div>".format(self.cover)), True
            )

            self.book_chapters = [{
                "filename": "default_cover.xhtml",
                "title": "Cover"
            }] + self.book_chapters

            self.filename = self.book_chapters[0]["filename"]
            self.save_page_html(cover_html)

        self.css_done_queue = Queue(0) if "win" not in sys.platform else WinQueue()
        self.display.info("Downloading book CSSs... (%s files)" % len(self.css), state=True)
        self.collect_css()
        self.images_done_queue = Queue(0) if "win" not in sys.platform else WinQueue()
        self.display.info("Downloading book images... (%s files)" % len(self.images), state=True)
        self.collect_images()

        self.display.info("Creating EPUB file...", state=True)
        self.create_epub()

        if not args.no_cookies:
            json.dump(self.cookies, open(COOKIES_FILE, "w"))

        self.display.done(os.path.join(self.BOOK_PATH, self.book_id + ".epub"))
        self.display.unregister()

        if not self.display.in_error and not args.log:
            os.remove(self.display.log_file)

        sys.exit(0)

    def return_cookies(self):
        return " ".join(["{0}={1};".format(k, v) for k, v in self.cookies.items()])

    def return_headers(self, url):
        if ORLY_BASE_HOST in urlsplit(url).netloc:
            self.HEADERS["cookie"] = self.return_cookies()

        else:
            self.HEADERS["cookie"] = ""

        return self.HEADERS

    def update_cookies(self, jar):
        for cookie in jar:
            self.cookies.update({
                cookie.name: cookie.value
            })

    def requests_provider(
            self, url, post=False, data=None, perfom_redirect=True, update_cookies=True, update_referer=True, **kwargs
    ):
        try:
            response = getattr(requests, "post" if post else "get")(
                url,
                headers=self.return_headers(url),
                data=data,
                allow_redirects=False,
                **kwargs
            )

            self.display.last_request = (
                url, data, kwargs, response.status_code, "\n".join(
                    ["\t{}: {}".format(*h) for h in response.headers.items()]
                ), response.text
            )

        except (requests.ConnectionError, requests.ConnectTimeout, requests.RequestException) as request_exception:
            self.display.error(str(request_exception))
            return 0

        if update_cookies:
            self.update_cookies(response.cookies)

        if update_referer:
            # TODO Update Referer HTTP Header
            # TODO How about Origin? 
            self.HEADERS["referer"] = response.request.url

        if response.is_redirect and perfom_redirect:
            return self.requests_provider(response.next.url, post, None, perfom_redirect, update_cookies, update_referer)
            # TODO How about **kwargs?

        return response

    @staticmethod
    def parse_cred(cred):
        if ":" not in cred:
            return False

        sep = cred.index(":")
        new_cred = ["", ""]
        new_cred[0] = cred[:sep].strip("'").strip('"')
        if "@" not in new_cred[0]:
            return False

        new_cred[1] = cred[sep + 1:]
        return new_cred

    def do_login(self, email, password):
        response = self.requests_provider(self.LOGIN_ENTRY_URL)
        if response == 0:
            self.display.exit("Login: unable to reach Safari Books Online. Try again...")

        redirect_uri = response.request.path_url[response.request.path_url.index("redirect_uri"):]  # TODO try...catch
        redirect_uri = redirect_uri[:redirect_uri.index("&")]
        redirect_uri = "https://api.oreilly.com%2Fapi%2Fv1%2Fauth%2Fopenid%2Fauthorize%3F" + redirect_uri

        response = self.requests_provider(
            self.LOGIN_URL,
            post=True,
            json={
                "email": email,
                "password": password,
                "redirect_uri": redirect_uri
            },
            perfom_redirect=False
        )

        if response == 0:
            self.display.exit("Login: unable to perform auth to Safari Books Online.\n    Try again...")

        if response.status_code != 200:  # TODO To be reviewed
            try:
                error_page = html.fromstring(response.text)
                errors_message = error_page.xpath("//ul[@class='errorlist']//li/text()")
                recaptcha = error_page.xpath("//div[@class='g-recaptcha']")
                messages = (["    `%s`" % error for error in errors_message
                            if "password" in error or "email" in error] if len(errors_message) else []) +\
                           (["    `ReCaptcha required (wait or do logout from the website).`"] if len(recaptcha) else[])
                self.display.exit("Login: unable to perform auth login to Safari Books Online.\n" +
                                  self.display.SH_YELLOW + "[*]" + self.display.SH_DEFAULT + " Details:\n"
                                  "%s" % "\n".join(messages if len(messages) else ["    Unexpected error!"]))
            except (html.etree.ParseError, html.etree.ParserError) as parsing_error:
                self.display.error(parsing_error)
                self.display.exit(
                    "Login: your login went wrong and it encountered in an error"
                    " trying to parse the login details of Safari Books Online. Try again..."
                )

        self.jwt = response.json()  # TODO: save JWT Tokens and use the refresh_token to restore user session
        response = self.requests_provider(self.jwt["redirect_uri"])
        if response == 0:
            self.display.exit("Login: unable to reach Safari Books Online. Try again...")

    def get_book_info(self):
        response = self.requests_provider(self.api_url)
        if response == 0:
            self.display.exit("API: unable to retrieve book info.")

        response = response.json()
        if not isinstance(response, dict) or len(response.keys()) == 1:
            self.display.exit(self.display.api_error(response))

        if "last_chapter_read" in response:
            del response["last_chapter_read"]

        return response

    def get_book_chapters(self, page=1):
        response = self.requests_provider(urljoin(self.api_url, "chapter/?page=%s" % page))
        if response == 0:
            self.display.exit("API: unable to retrieve book chapters.")

        response = response.json()

        if not isinstance(response, dict) or len(response.keys()) == 1:
            self.display.exit(self.display.api_error(response))

        if "results" not in response or not len(response["results"]):
            self.display.exit("API: unable to retrieve book chapters.")

        if response["count"] > sys.getrecursionlimit():
            sys.setrecursionlimit(response["count"])

        result = []
        result.extend([c for c in response["results"] if "cover" in c["filename"] or "cover" in c["title"]])
        for c in result:
            del response["results"][response["results"].index(c)]

        result += response["results"]
        return result + (self.get_book_chapters(page + 1) if response["next"] else [])

    def get_default_cover(self):
        response = self.requests_provider(self.book_info["cover"], update_cookies=False, stream=True)
        if response == 0:
            self.display.error("Error trying to retrieve the cover: %s" % self.book_info["cover"])
            return False

        file_ext = response.headers["Content-Type"].split("/")[-1]
        with open(os.path.join(self.images_path, "default_cover." + file_ext), 'wb') as i:
            for chunk in response.iter_content(1024):
                i.write(chunk)

        return "default_cover." + file_ext

    def get_html(self, url):
        response = self.requests_provider(url)
        if response == 0 or response.status_code != 200:
            self.display.exit(
                "Crawler: error trying to retrieve this page: %s (%s)\n    From: %s" %
                (self.filename, self.chapter_title, url)
            )

        root = None
        try:
            root = html.fromstring(response.text, base_url=SAFARI_BASE_URL)

        except (html.etree.ParseError, html.etree.ParserError) as parsing_error:
            self.display.error(parsing_error)
            self.display.exit(
                "Crawler: error trying to parse this page: %s (%s)\n    From: %s" %
                (self.filename, self.chapter_title, url)
            )

        return root

    @staticmethod
    def url_is_absolute(url):
        return bool(urlparse(url).netloc)

    def link_replace(self, link):
        if link:
            if not self.url_is_absolute(link):
                if "cover" in link or "images" in link or "graphics" in link or \
                        link[-3:] in ["jpg", "peg", "png", "gif"]:
                    link = urljoin(self.base_url, link)
                    if link not in self.images:
                        self.images.append(link)
                        self.display.log("Crawler: found a new image at %s" % link)

                    image = link.split("/")[-1]
                    return "Images/" + image

                return link.replace(".html", ".xhtml")

            else:
                if self.book_id in link:
                    return self.link_replace(link.split(self.book_id)[-1])

        return link

    @staticmethod
    def get_cover(html_root):
        lowercase_ns = etree.FunctionNamespace(None)
        lowercase_ns["lower-case"] = lambda _, n: n[0].lower() if n and len(n) else ""

        images = html_root.xpath("//img[contains(lower-case(@id), 'cover') or contains(lower-case(@class), 'cover') or"
                                 "contains(lower-case(@name), 'cover') or contains(lower-case(@src), 'cover') or"
                                 "contains(lower-case(@alt), 'cover')]")
        if len(images):
            return images[0]

        divs = html_root.xpath("//div[contains(lower-case(@id), 'cover') or contains(lower-case(@class), 'cover') or"
                               "contains(lower-case(@name), 'cover') or contains(lower-case(@src), 'cover')]//img")
        if len(divs):
            return divs[0]

        a = html_root.xpath("//a[contains(lower-case(@id), 'cover') or contains(lower-case(@class), 'cover') or"
                            "contains(lower-case(@name), 'cover') or contains(lower-case(@src), 'cover')]//img")
        if len(a):
            return a[0]

        return None

    def parse_html(self, root, first_page=False):
        if random() > 0.8:
            if len(root.xpath("//div[@class='controls']/a/text()")):
                self.display.exit(self.display.api_error(" "))

        book_content = root.xpath("//div[@id='sbo-rt-content']")
        if not len(book_content):
            self.display.exit(
                "Parser: book content's corrupted or not present: %s (%s)" %
                (self.filename, self.chapter_title)
            )

        page_css = ""
        stylesheet_links = root.xpath("//link[@rel='stylesheet']")
        if len(stylesheet_links):
            stylesheet_count = 0
            for s in stylesheet_links:
                css_url = urljoin("https:", s.attrib["href"]) if s.attrib["href"][:2] == "//" \
                    else urljoin(self.base_url, s.attrib["href"])

                if css_url not in self.css:
                    self.css.append(css_url)
                    self.display.log("Crawler: found a new CSS at %s" % css_url)

                page_css += "<link href=\"Styles/Style{0:0>2}.css\" " \
                            "rel=\"stylesheet\" type=\"text/css\" />\n".format(stylesheet_count)
                stylesheet_count += 1

        stylesheets = root.xpath("//style")
        if len(stylesheets):
            for css in stylesheets:
                if "data-template" in css.attrib and len(css.attrib["data-template"]):
                    css.text = css.attrib["data-template"]
                    del css.attrib["data-template"]

                try:
                    page_css += html.tostring(css, method="xml", encoding='unicode') + "\n"

                except (html.etree.ParseError, html.etree.ParserError) as parsing_error:
                    self.display.error(parsing_error)
                    self.display.exit(
                        "Parser: error trying to parse one CSS found in this page: %s (%s)" %
                        (self.filename, self.chapter_title)
                    )

        # TODO: add all not covered tag for `link_replace` function
        svg_image_tags = root.xpath("//image")
        if len(svg_image_tags):
            for img in svg_image_tags:
                image_attr_href = [x for x in img.attrib.keys() if "href" in x]
                if len(image_attr_href):
                    svg_url = img.attrib.get(image_attr_href[0])
                    svg_root = img.getparent().getparent()
                    new_img = svg_root.makeelement("img")
                    new_img.attrib.update({"src": svg_url})
                    svg_root.remove(img.getparent())
                    svg_root.append(new_img)

        book_content = book_content[0]
        book_content.rewrite_links(self.link_replace)

        xhtml = None
        try:
            if first_page:
                is_cover = self.get_cover(book_content)
                if is_cover is not None:
                    page_css = "<style>" \
                               "body{display:table;position:absolute;margin:0!important;height:100%;width:100%;}" \
                               "#Cover{display:table-cell;vertical-align:middle;text-align:center;}" \
                               "img{height:90vh;margin-left:auto;margin-right:auto;}" \
                               "</style>"
                    cover_html = html.fromstring("<div id=\"Cover\"></div>")
                    cover_div = cover_html.xpath("//div")[0]
                    cover_img = cover_div.makeelement("img")
                    cover_img.attrib.update({"src": is_cover.attrib["src"]})
                    cover_div.append(cover_img)
                    book_content = cover_html

                    self.cover = is_cover.attrib["src"]

            xhtml = html.tostring(book_content, method="xml", encoding='unicode')

        except (html.etree.ParseError, html.etree.ParserError) as parsing_error:
            self.display.error(parsing_error)
            self.display.exit(
                "Parser: error trying to parse HTML of this page: %s (%s)" %
                (self.filename, self.chapter_title)
            )

        return page_css, xhtml

    @staticmethod
    def escape_dirname(dirname, clean_space=False):
        if ":" in dirname:
            if dirname.index(":") > 15:
                dirname = dirname.split(":")[0]

            elif "win" in sys.platform:
                dirname = dirname.replace(":", ",")

        for ch in ['~', '#', '%', '&', '*', '{', '}', '\\', '<', '>', '?', '/', '`', '\'', '"', '|', '+']:
            if ch in dirname:
                dirname = dirname.replace(ch, "_")

        return dirname if not clean_space else dirname.replace(" ", "")

    def create_dirs(self):
        if os.path.isdir(self.BOOK_PATH):
            self.display.log("Book directory already exists: %s" % self.BOOK_PATH)

        else:
            os.makedirs(self.BOOK_PATH)

        oebps = os.path.join(self.BOOK_PATH, "OEBPS")
        if not os.path.isdir(oebps):
            self.display.book_ad_info = True
            os.makedirs(oebps)

        self.css_path = os.path.join(oebps, "Styles")
        if os.path.isdir(self.css_path):
            self.display.log("CSSs directory already exists: %s" % self.css_path)

        else:
            os.makedirs(self.css_path)
            self.display.css_ad_info.value = 1

        self.images_path = os.path.join(oebps, "Images")
        if os.path.isdir(self.images_path):
            self.display.log("Images directory already exists: %s" % self.images_path)

        else:
            os.makedirs(self.images_path)
            self.display.images_ad_info.value = 1

    def save_page_html(self, contents):
        self.filename = self.filename.replace(".html", ".xhtml")
        open(os.path.join(self.BOOK_PATH, "OEBPS", self.filename), "wb")\
            .write(self.BASE_HTML.format(contents[0], contents[1]).encode("utf-8", 'xmlcharrefreplace'))
        self.display.log("Created: %s" % self.filename)

    def get(self):
        len_books = len(self.book_chapters)

        for _ in range(len_books):
            if not len(self.chapters_queue):
                return

            first_page = len_books == len(self.chapters_queue)

            next_chapter = self.chapters_queue.pop(0)
            self.chapter_title = next_chapter["title"]
            self.filename = next_chapter["filename"]

            if os.path.isfile(os.path.join(self.BOOK_PATH, "OEBPS", self.filename.replace(".html", ".xhtml"))):
                if not self.display.book_ad_info and \
                        next_chapter not in self.book_chapters[:self.book_chapters.index(next_chapter)]:
                    self.display.info(
                        "File `%s` already exists.\n"
                        "    If you want to download again all the book%s,\n"
                        "    please delete the `<BOOK NAME>/OEBPS/*.xhtml` files and restart the program." %
                        (
                            self.filename.replace(".html", ".xhtml"),
                            " (especially because you selected the `--no-kindle` option)"
                            if self.args.no_kindle else ""
                        )
                    )
                    self.display.book_ad_info = 2

            else:
                self.save_page_html(self.parse_html(self.get_html(next_chapter["web_url"]), first_page))

            self.display.state(len_books, len_books - len(self.chapters_queue))

    def _thread_download_css(self, url):
        css_file = os.path.join(self.css_path, "Style{0:0>2}.css".format(self.css.index(url)))
        if os.path.isfile(css_file):
            if not self.display.css_ad_info.value and url not in self.css[:self.css.index(url)]:
                self.display.info("File `%s` already exists.\n"
                                  "    If you want to download again all the CSSs,\n"
                                  "    please delete the `<BOOK NAME>/OEBPS/*.xhtml` and `<BOOK NAME>/OEBPS/Styles/*`"
                                  " files and restart the program." %
                                  css_file)
                self.display.css_ad_info.value = 1

        else:
            response = self.requests_provider(url, update_cookies=False)
            if response == 0:
                self.display.error("Error trying to retrieve this CSS: %s\n    From: %s" % (css_file, url))

            with open(css_file, 'wb') as s:
                s.write(response.content)

        self.css_done_queue.put(1)
        self.display.state(len(self.css), self.css_done_queue.qsize())

    def _thread_download_images(self, url):
        image_name = url.split("/")[-1]
        image_path = os.path.join(self.images_path, image_name)
        if os.path.isfile(image_path):
            if not self.display.images_ad_info.value and url not in self.images[:self.images.index(url)]:
                self.display.info("File `%s` already exists.\n"
                                  "    If you want to download again all the images,\n"
                                  "    please delete the `<BOOK NAME>/OEBPS/*.xhtml` and `<BOOK NAME>/OEBPS/Images/*`"
                                  " files and restart the program." %
                                  image_name)
                self.display.images_ad_info.value = 1

        else:
            response = self.requests_provider(urljoin(SAFARI_BASE_URL, url),
                                              update_cookies=False,
                                              stream=True)
            if response == 0:
                self.display.error("Error trying to retrieve this image: %s\n    From: %s" % (image_name, url))

            with open(image_path, 'wb') as img:
                for chunk in response.iter_content(1024):
                    img.write(chunk)

        self.images_done_queue.put(1)
        self.display.state(len(self.images), self.images_done_queue.qsize())

    def _start_multiprocessing(self, operation, full_queue):
        if len(full_queue) > 5:
            for i in range(0, len(full_queue), 5):
                self._start_multiprocessing(operation, full_queue[i:i+5])

        else:
            process_queue = [Process(target=operation, args=(arg,)) for arg in full_queue]
            for proc in process_queue:
                proc.start()

            for proc in process_queue:
                proc.join()

    def collect_css(self):
        self.display.state_status.value = -1

        if "win" in sys.platform:
            # TODO
            for css_url in self.css:
                self._thread_download_css(css_url)

        else:
            self._start_multiprocessing(self._thread_download_css, self.css)

    def collect_images(self):
        if self.display.book_ad_info == 2:
            self.display.info("Some of the book contents were already downloaded.\n"
                              "    If you want to be sure that all the images will be downloaded,\n"
                              "    please delete the `<BOOK NAME>/OEBPS/*.xhtml` files and restart the program.")

        self.display.state_status.value = -1

        if "win" in sys.platform:
            # TODO
            for image_url in self.images:
                self._thread_download_images(image_url)

        else:
            self._start_multiprocessing(self._thread_download_images, self.images)

    def create_content_opf(self):
        self.css = next(os.walk(self.css_path))[2]
        self.images = next(os.walk(self.images_path))[2]

        manifest = []
        spine = []
        for c in self.book_chapters:
            c["filename"] = c["filename"].replace(".html", ".xhtml")
            item_id = escape("".join(c["filename"].split(".")[:-1]))
            manifest.append("<item id=\"{0}\" href=\"{1}\" media-type=\"application/xhtml+xml\" />".format(
                item_id, c["filename"]
            ))
            spine.append("<itemref idref=\"{0}\"/>".format(item_id))

        for i in set(self.images):
            dot_split = i.split(".")
            head = "img_" + escape("".join(dot_split[:-1]))
            extension = dot_split[-1]
            manifest.append("<item id=\"{0}\" href=\"Images/{1}\" media-type=\"image/{2}\" />".format(
                head, i, "jpeg" if "jp" in extension else extension
            ))

        for i in range(len(self.css)):
            manifest.append("<item id=\"style_{0:0>2}\" href=\"Styles/Style{0:0>2}.css\" "
                            "media-type=\"text/css\" />".format(i))

        authors = "\n".join("<dc:creator opf:file-as=\"{0}\" opf:role=\"aut\">{0}</dc:creator>".format(
            escape(aut["name"])
        ) for aut in self.book_info["authors"])

        subjects = "\n".join("<dc:subject>{0}</dc:subject>".format(escape(sub["name"]))
                             for sub in self.book_info["subjects"])

        return self.CONTENT_OPF.format(
            (self.book_info["isbn"] if self.book_info["isbn"] else self.book_id),
            escape(self.book_title),
            authors,
            escape(self.book_info["description"]),
            subjects,
            ", ".join(escape(pub["name"]) for pub in self.book_info["publishers"]),
            escape(self.book_info["rights"]),
            self.book_info["issued"],
            self.cover,
            "\n".join(manifest),
            "\n".join(spine),
            self.book_chapters[0]["filename"].replace(".html", ".xhtml")
        )

    @staticmethod
    def parse_toc(l, c=0, mx=0):
        r = ""
        for cc in l:
            c += 1
            if int(cc["depth"]) > mx:
                mx = int(cc["depth"])

            r += "<navPoint id=\"{0}\" playOrder=\"{1}\">" \
                 "<navLabel><text>{2}</text></navLabel>" \
                 "<content src=\"{3}\"/>".format(
                    cc["fragment"] if len(cc["fragment"]) else cc["id"], c,
                    escape(cc["label"]), cc["href"].replace(".html", ".xhtml").split("/")[-1]
                 )

            if cc["children"]:
                sr, c, mx = SafariBooks.parse_toc(cc["children"], c, mx)
                r += sr

            r += "</navPoint>\n"

        return r, c, mx

    def create_toc(self):
        response = self.requests_provider(urljoin(self.api_url, "toc/"))
        if response == 0:
            self.display.exit("API: unable to retrieve book chapters. "
                              "Don't delete any files, just run again this program"
                              " in order to complete the `.epub` creation!")

        response = response.json()

        if not isinstance(response, list) and len(response.keys()) == 1:
            self.display.exit(
                self.display.api_error(response) +
                " Don't delete any files, just run again this program"
                " in order to complete the `.epub` creation!"
            )

        navmap, _, max_depth = self.parse_toc(response)
        return self.TOC_NCX.format(
            (self.book_info["isbn"] if self.book_info["isbn"] else self.book_id),
            max_depth,
            self.book_title,
            ", ".join(aut["name"] for aut in self.book_info["authors"]),
            navmap
        )

    def create_epub(self):
        open(os.path.join(self.BOOK_PATH, "mimetype"), "w").write("application/epub+zip")
        meta_info = os.path.join(self.BOOK_PATH, "META-INF")
        if os.path.isdir(meta_info):
            self.display.log("META-INF directory already exists: %s" % meta_info)

        else:
            os.makedirs(meta_info)

        open(os.path.join(meta_info, "container.xml"), "wb").write(
            self.CONTAINER_XML.encode("utf-8", "xmlcharrefreplace")
        )
        open(os.path.join(self.BOOK_PATH, "OEBPS", "content.opf"), "wb").write(
            self.create_content_opf().encode("utf-8", "xmlcharrefreplace")
        )
        open(os.path.join(self.BOOK_PATH, "OEBPS", "toc.ncx"), "wb").write(
            self.create_toc().encode("utf-8", "xmlcharrefreplace")
        )

        zip_file = os.path.join(PATH, "Books", self.book_id)
        if os.path.isfile(zip_file + ".zip"):
            os.remove(zip_file + ".zip")

        shutil.make_archive(zip_file, 'zip', self.BOOK_PATH)
        os.rename(zip_file + ".zip", os.path.join(self.BOOK_PATH, self.book_id) + ".epub")
Ejemplo n.º 36
0
    item['coverImage'] = html_element.xpath(
        '//div[@class="detail_left fn-left"]/img/@data-original')[0]

    with open('shijijiyua.json', 'w') as file:
        json_str = json.dumps(item, ensure_ascii=False) + '\n'
        file.write(json_str)


if __name__ == '__main__':

    # 创建任务队列
    taskQueue = Queue()

    # 设置起始任务
    taskQueue.put(
        'http://date.jiayuan.com/eventslist_new.php?page=1&city_id=4201&shop_id=33'
    )
    taskQueue.put(
        'http://date.jiayuan.com/eventslist_new.php?page=1&city_id=31&shop_id=15'
    )
    taskQueue.put(
        'http://date.jiayuan.com/eventslist_new.php?page=1&city_id=3702&shop_id=42'
    )
    taskQueue.put(
        'http://date.jiayuan.com/eventslist_new.php?page=1&city_id=50&shop_id=5'
    )

    # 创建数据队列
    dataQueue = Queue()

    # 创建进程爬取任务
Ejemplo n.º 37
0
class Viewer(object):
    def __init__(self):
        self.image_queue = Queue()
        self.pose_queue = Queue()

        self.view_thread = Process(target=self.view)
        self.view_thread.start()

    def update_pose(self, pose):
        if pose is None:
            return
        self.pose_queue.put(pose.matrix())

    def update_image(self, image):
        if image is None:
            return
        elif image.ndim == 2:
            image = np.repeat(image[..., np.newaxis], 3, axis=2)
        self.image_queue.put(image)

    def view(self):
        pangolin.CreateWindowAndBind('Viewer', 1024, 768)
        gl.glEnable(gl.GL_DEPTH_TEST)
        gl.glEnable(gl.GL_BLEND)
        gl.glBlendFunc(gl.GL_SRC_ALPHA, gl.GL_ONE_MINUS_SRC_ALPHA)

        viewpoint_x = 0
        viewpoint_y = -7
        viewpoint_z = -18
        viewpoint_f = 1000

        proj = pangolin.ProjectionMatrix(1024, 768, viewpoint_f, viewpoint_f,
                                         512, 389, 0.1, 300)
        look_view = pangolin.ModelViewLookAt(viewpoint_x, viewpoint_y,
                                             viewpoint_z, 0, 0, 0, 0, -1, 0)

        # Camera Render Object (for view / scene browsing)
        scam = pangolin.OpenGlRenderState(proj, look_view)

        # Add named OpenGL viewport to window and provide 3D Handler
        dcam = pangolin.CreateDisplay()
        dcam.SetBounds(0.0, 1.0, 175 / 1024., 1.0, -1024 / 768.)
        dcam.SetHandler(pangolin.Handler3D(scam))

        # image
        width, height = 376, 240
        dimg = pangolin.Display('image')
        dimg.SetBounds(0, height / 768., 0.0, width / 1024., 1024 / 768.)
        dimg.SetLock(pangolin.Lock.LockLeft, pangolin.Lock.LockTop)

        texture = pangolin.GlTexture(width, height, gl.GL_RGB, False, 0,
                                     gl.GL_RGB, gl.GL_UNSIGNED_BYTE)
        image = np.ones((height, width, 3), 'uint8')

        # axis
        axis = pangolin.Renderable()
        axis.Add(pangolin.Axis())

        trajectory = DynamicArray()
        camera = None
        image = None

        while not pangolin.ShouldQuit():
            if not self.pose_queue.empty():
                while not self.pose_queue.empty():
                    pose = self.pose_queue.get()
                trajectory.append(pose[:3, 3])
                camera = pose.T

            if not self.image_queue.empty():
                while not self.image_queue.empty():
                    img = self.image_queue.get()
                img = img[::-1, :, ::-1]
                img = cv2.resize(img, (width, height))
                image = img.copy()

            gl.glClear(gl.GL_COLOR_BUFFER_BIT | gl.GL_DEPTH_BUFFER_BIT)
            gl.glClearColor(1.0, 1.0, 1.0, 1.0)
            dcam.Activate(scam)

            # draw axis
            axis.Render()

            # draw current camera
            if camera is not None:
                gl.glLineWidth(1)
                gl.glColor3f(0.0, 0.0, 1.0)
                pangolin.DrawCameras(np.array([camera]), 0.5)

            # show trajectory
            if len(trajectory) > 0:
                gl.glPointSize(2)
                gl.glColor3f(0.0, 0.0, 0.0)
                pangolin.DrawPoints(trajectory.array())

            # show image
            if image is not None:
                texture.Upload(image, gl.GL_RGB, gl.GL_UNSIGNED_BYTE)
                dimg.Activate()
                gl.glColor3f(1.0, 1.0, 1.0)
                texture.RenderToViewport()

            pangolin.FinishFrame()