Ejemplos de merge en Python, ejemplos de utils.merge en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: locals.py Proyecto: shrewdlogarithm/pyst

def getlocals(steamhome, logindata, gamedata, localdata):
    logindata["configdata"] = {}
    for root, subFolders, files in os.walk(os.path.join(steamhome["path"], "userdata", logindata["dir"])):
        for file in files:
            if file.lower().endswith("config.vdf"):
                vdfdata = utils.vdf.load(open(os.path.join(steamhome["path"], "userdata", root, file)))
                logindata["configdata"] = utils.merge(logindata["configdata"], vdfdata)

    def getnewgamedata(appid, name):
        ret = {"appid": appid, "name": name}
        if int(appid) <= 999999:  # not a shortcut
            ret["firstseen"] = int(time.time())
        return ret

    purchaseddata = purchased.getpurchased(logindata)

    localdb = utils.merge(shortcuts.getshortcuts(steamhome, logindata), apppackages.getappinfo(steamhome, logindata))
    localdata.clear()
    for g in localdb:
        if "data-isdlc" not in localdb[g]:
            localdata[g] = localdb[g]
            if not g in gamedata:
                gamedata[g] = getnewgamedata(g, localdb[g]["name"])
            if "data-packageid" in localdb[g] and localdb[g]["data-packageid"] in purchaseddata:
                gamedata[g]["firstseen"] = purchaseddata[localdb[g]["data-packageid"]]

Ejemplo n.º 2

0

Mostrar archivo

Archivo: sort.py Proyecto: sanchopanca/clrs

 def _merge_sort(a, left, right):
     if right - left < 2:
         return
     pivot = (right + left) // 2
     _merge_sort(a, left, pivot)
     _merge_sort(a, pivot, right)
     merge(a, left, pivot, right)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: objects.py Proyecto: bkfox/synctrex

    def select_group (self, group):
        if self.__group == group:
            return

        if group:
            groups = [ group ] + [ g for g in self.groups if g != group ]
        else:
            groups = self.groups

        # clear dict and only keep some values we want unchanged
        if not self.__base_dict:
            self.__base_dict = self.__dict__.copy()
        else:
            self.__dict__ = self.__base_dict.copy()

        # updating
        for group_ in groups:
            group_.select_group(None)
            if group_.handlers:
                merge(self.handlers, group_.handlers)
            self.__inherits(self.__dict__, group_.__dict__)

        # some value that we must reset to their original state
        for key in ('synctrex', 'group', 'groups', 'children'):
            if key in self.__base_dict:
                setattr(self, key, self.__base_dict[key])
            elif hasattr(self, key):
                delattr(self, key)

        self.__group = group

Ejemplo n.º 4

0

Mostrar archivo

Archivo: pyst.py Proyecto: shrewdlogarithm/pyst

def mkdumpdb():
	dumpdb = {
		"name64": logindata["id64"],
	    "name": logindata["dir"],
		"user": logindata["name"]
	}
	if "avatar" in logindata["configdata"]["UserLocalConfigStore"]["friends"][logindata["dir"]]:
		dumpdb["avatar"] = logindata["configdata"]["UserLocalConfigStore"]["friends"][logindata["dir"]]["avatar"]
	dumpdb["gamelist"] = {}
	for db in dbs:
		dbo = copy.deepcopy(db)
		utils.merge(dumpdb["gamelist"],dbo)
	return dumpdb

Ejemplo n.º 5

0

Mostrar archivo

    def test(self):
        print('patched based testing')
                #load new images in a folder
        try:
            self.load(self.config.checkpoint_dir)
            print(" [*] Load SUCCESS")
        except:
            print(" [!] Load failed...")
            return

        print('new_data_folder',self.config.new_image_path)

        nxny_list,namelist=input_setup_test(self.sess,self.config)
        new_data_dir = os.path.join(self.config.checkpoint_dir,'new.c'+str(self.config.c_dim)+'.h5')
        X_test,_=read_data(new_data_dir)
        tst_data_loader=dataLoader(dataSize=X_test.shape[0],
                                   batchSize=self.config.test_batch_size,
                                   shuffle=False)
        tst_batch_count=int(math.ceil(X_test.shape[0]/self.config.test_batch_size))
        #print(X_test[0].shape)
        #print(X_test[1].shape)
        #new_data_loader=tf.data.Dataset.from_tensor_slices(X_test)
        #new_data_loader = new_data_loader.batch(batch_size=self.config.test_batch_size)
        #iterator = tf.data.Iterator.from_structure(new_data_loader.output_types,new_data_loader.output_shapes)
        #next_batch=iterator.get_next()
        #new_init_op = iterator.make_initializer(new_data_loader)
        
        result=list()
        #self.sess.run(new_init_op)
        start_time=time.time()
        for batch in range(tst_batch_count):
            inx=tst_data_loader.get_batch()
            X=X_test[inx].view()#self.sess.run(next_batch)
            y_pred = self.pred.eval({self.images: X})
            result.append(y_pred)
                #total_mse+=tf.reduce_mean(tf.squared_difference(y_pred, y))
                #batch_count+=1

        #averge_mse=total_mse/batch_count
        #PSNR=-10*math.log10(averge_mse)
        print("time: [%4.2f]" % (time.time()-start_time))
        
        #save
            #flatten
        print(len(result))
        output=list()
        for i in result:
            for j in range(i.shape[0]):
                output.append(i[j])
        print(len(output))
        print(output[0].shape)
        
        #result=[self.sess.run(i) for i in result]
        patch_inx=0
        for i in range(len(nxny_list)):
            nx,ny=nxny_list[i]
            img=merge(output[patch_inx:(patch_inx+nx*ny)],(nx,ny))
            print('img shape@',i,img.shape)
            patch_inx+=nx*ny
            imsave(img,namelist[i].replace('.bmp','.bmp.c'+str(self.config.c_dim)))

Ejemplo n.º 6

0

Mostrar archivo

Archivo: run_lib.py Proyecto: wen8411/tpu

def predict(override_cfg, model_dir):
  """Run model over a dataset and dump predictions to json file."""
  assert FLAGS.predict_path
  cfg = _load_config(model_dir)
  cfg = utils.merge(cfg, override_cfg)
  input_fn = data.get_input_fn(
      split=cfg.dataset.eval_split,
      max_length=None,
      repeat=False,
      shuffle=False,
      cache=False,
      limit=None,
      data_path=cfg.dataset.data_path,
      vocab_path=cfg.dataset.vocab_path,
      is_tpu=False,
      use_generator=True,
      is_training=False)
  estimator = model.get_estimator(**cfg)
  predictions = dict()
  for i, prediction in enumerate(estimator.predict(input_fn)):
    predictions[prediction["id"]] = prediction["answer"]
    if i % 100 == 0:
      tf.logging.info("Prediction %s | %s: %s" % (i, prediction["id"],
                                                  prediction["answer"]))

  # Dump results to a file
  with tf.gfile.GFile(FLAGS.predict_path, "w") as f:
    json.dump(predictions, f)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: isic_dataset.py Proyecto: cs911/Med-AL-SSL

    def get_base_dataset_simclr(self):
        base_dataset = torchvision.datasets.ImageFolder(
            self.train_path, transform=None
        )

        '''
        if self.oversampling:
            base_indices = oversampling_indices(np.array(list(range(len(base_dataset)))),
                                                np.array(base_dataset.targets))
        else:
            base_indices = np.array(list(range(len(base_dataset))))

        '''

        if self.merged and len(self.merge_classes) > 0:
            base_dataset = merge(base_dataset, self.merge_classes)

        if self.remove_classes and len(self.classes_to_remove) > 0:
            base_dataset = remove(base_dataset, self.classes_to_remove)

        base_indices = np.array(list(range(len(base_dataset))))
        base_dataset = WeaklySupervisedDataset(base_dataset, base_indices, transform=self.transform_simclr,
                                               mean=self.isic_mean, std=self.isic_std)

        return base_dataset

Ejemplo n.º 8

0

Mostrar archivo

Archivo: fdsys.py Proyecto: actkatiemacias/congress

def document_info_for(filename, cache, options):
  mods_url = mods_for(filename)
  mods_cache = ""
  body = utils.download(mods_url, 
    cache,
    utils.merge(options, {'xml': True})
  )

  doc = etree.fromstring(body)
  mods_ns = {"mods": "http://www.loc.gov/mods/v3"}

  locations = doc.xpath("//mods:location/mods:url", namespaces=mods_ns)

  urls = {}
  for location in locations:
    label = location.attrib['displayLabel']
    if "HTML" in label:
      format = "html"
    elif "PDF" in label:
      format = "pdf"
    elif "XML" in label:
      format = "xml"
    else:
      format = "unknown"
    urls[format] = location.text

  issued_on = doc.xpath("string(//mods:dateIssued)", namespaces=mods_ns)

  return issued_on, urls

Ejemplo n.º 9

0

Mostrar archivo

Archivo: run_lib.py Proyecto: wen8411/tpu

def evaluate(override_cfg, model_dir, continuous=True):
  """Run training and evaluation."""
  tf.logging.info("model_dir = " + model_dir)
  try:
    cfg = _load_config(model_dir)
  except tf.errors.NotFoundError:
    tf.logging.info("Model directory does not exist yet. Creating new config.")
    cfg = model.build_config(model_dir=model_dir, data_path=FLAGS.data_path)
  tf.logging.info(cfg)
  tf.logging.info(override_cfg)
  cfg = utils.merge(cfg, override_cfg)

  cfg.tpu.enable = False
  cfg.dataset.max_length = None

  # Construct inputs and estimator
  _, eval_input = data.build_dataset(cfg.dataset, is_tpu=cfg.tpu.enable)
  estimator = model.get_estimator(**cfg)
  if continuous:
    checkpoints_iterator = contrib_training.checkpoints_iterator(cfg.model_dir)
    eval_metrics = None
    for ckpt_path in checkpoints_iterator:
      eval_metrics = estimator.evaluate(
          input_fn=eval_input, checkpoint_path=ckpt_path)
      tf.logging.info(pprint.pformat(eval_metrics))
    return eval_metrics
  else:
    eval_metrics = estimator.evaluate(input_fn=eval_input)
    return eval_metrics

Ejemplo n.º 10

0

Mostrar archivo

    def add(self, words):
        # logging.info("add {}".format(words))
        if len(words) in [UNIGRAM, BIGRAM, TRIGRAM]:
            self.counter.add(utils.merge(words))

        else:
            raise TypeError("Only support unigram, bigram, trigram")

Ejemplo n.º 11

0

Mostrar archivo

def eval(model, name, sample_shape=[4, 4], load_all_ckpt=True):
    if name == None:
        name = model.name
    dir_name = 'eval/' + name
    if tf.gfile.Exists(dir_name):
        tf.gfile.DeleteRecursively(dir_name)
    tf.gfile.MakeDirs(dir_name)

    # training=False => generator only
    restorer = tf.train.Saver(slim.get_model_variables())

    config = tf.ConfigProto()
    best_gpu = utils.get_best_gpu()
    config.gpu_options.visible_device_list = str(
        best_gpu)  # Works same as CUDA_VISIBLE_DEVICES!
    with tf.Session(config=config) as sess:
        ckpts = get_all_checkpoints('./checkpoints/' + name,
                                    force=load_all_ckpt)
        size = sample_shape[0] * sample_shape[1]

        z_ = sample_z([size, model.z_dim])

        for v in ckpts:
            print("Evaluating {} ...".format(v))
            restorer.restore(sess, v)
            global_step = int(v.split('/')[-1].split('-')[-1])

            fake_samples = sess.run(model.fake_sample, {model.z: z_})

            # inverse transform: [-1, 1] => [0, 1]
            fake_samples = (fake_samples + 1.) / 2.
            merged_samples = utils.merge(fake_samples, size=sample_shape)
            fn = "{:0>5d}.png".format(global_step)
            scipy.misc.imsave(os.path.join(dir_name, fn), merged_samples)

Ejemplo n.º 12

0

Mostrar archivo

def mirror_package_zipfile(collection, package_name, file_path, lastmod,
                           lastmod_cache, options):
    # Do we already have this file updated?
    if lastmod_cache.get("package") == lastmod:
        if not options.get("force", False):
            return

    # With --cached, skip if the file is already downloaded.
    if os.path.exists(file_path) and options.get("cached", False):
        return

    # Download.
    file_url = GOVINFO_BASE_URL + "content/pkg/{}-{}.zip".format(
        collection, package_name)
    logging.warn("Downloading: " + file_path)
    data = utils.download(
        file_url,
        file_path,
        utils.merge(
            options,
            {
                'binary': True,
                'force': True,  # decision to cache was made above
                'to_cache': False,
                'needs_content': False,
            }))

    # Update the lastmod of the downloaded file.
    lastmod_cache['package'] = lastmod
    return True

Ejemplo n.º 13

0

Mostrar archivo

    def run_test(self):
        test_data, test_label = test_input_setup(self)

        print("Testing...")

        start_time = time.time()
        result = np.clip(
            self.pred.eval({
                self.images: test_data,
                self.labels: test_label,
                self.batch: 1
            }), 0, 1)
        passed = time.time() - start_time
        img1 = tf.convert_to_tensor(test_label, dtype=tf.float32)
        img2 = tf.convert_to_tensor(result, dtype=tf.float32)
        psnr = self.sess.run(tf.image.psnr(img1, img2, 1))
        ssim = self.sess.run(tf.image.ssim(img1, img2, 1))
        print("Took %.3f seconds, PSNR: %.6f, SSIM: %.6f" %
              (passed, psnr, ssim))

        result = merge(self, result)
        image_path = os.path.join(os.getcwd(), self.output_dir)
        image_path = os.path.join(image_path, "test_image.png")

        array_image_save(result, image_path)

Ejemplo n.º 14

0

Mostrar archivo

Archivo: votes.py Proyecto: Christopher-Churnick/congress

def vote_ids_for_senate(congress, session_year, options):
    session_num = int(session_year) - utils.get_congress_first_year(int(congress)) + 1

    vote_ids = []

    page = utils.download(
        "http://www.senate.gov/legislative/LIS/roll_call_lists/vote_menu_%s_%d.xml" % (congress, session_num),
        "%s/votes/%s/pages/senate.xml" % (congress, session_year),
        utils.merge(options, {'binary': True})
    )

    if not page:
        logging.error("Couldn't download Senate vote XML index, aborting")
        return None

    dom = etree.fromstring(page)

    # Sanity checks.
    if int(congress) != int(dom.xpath("congress")[0].text):
        logging.error("Senate vote XML returns the wrong Congress: %s" % dom.xpath("congress")[0].text)
        return None
    if int(session_year) != int(dom.xpath("congress_year")[0].text):
        logging.error("Senate vote XML returns the wrong session: %s" % dom.xpath("congress_year")[0].text)
        return None

    # Get vote list.
    for vote in dom.xpath("//vote"):
        num = int(vote.xpath("vote_number")[0].text)
        vote_id = "s" + str(num) + "-" + str(congress) + "." + session_year
        if not should_process(vote_id, options):
            continue
        vote_ids.append(vote_id)
    return vote_ids

Ejemplo n.º 15

0

Mostrar archivo

Archivo: selection.py Proyecto: yasusii/fooling

 def narrow_docids(self, idx):
   m0 = [ decode_array(idx[feat]) for feat in self.feats if idx.has_key(feat) ]
   if not m0:
     return []
   refs = merge(m0)
   locs = [ (refs[i], refs[i+1]) for i in xrange(0, len(refs), 2) ]
   return locs

Ejemplo n.º 16

0

Mostrar archivo

Archivo: fdsys.py Proyecto: hugovk/congress

def mirror_package(sitemap, package_name, lastmod, content_detail_url, options):
    """Create a local mirror of a FDSys package."""

    # Return a list of files we downloaded.
    results = []

    if not options.get("granules", False):
        # Most packages are just a package. This is the usual case.
        results = mirror_package_or_granule(sitemap, package_name, None, lastmod, options)

    else:
        # In some collections, like STATUTE, each document has subparts which are not
        # described in the sitemap. Load the main HTML page and scrape for the sub-files.
        # In the STATUTE collection, the MODS information in granules is redundant with
        # information in the top-level package MODS file. But the only way to get granule-
        # level PDFs is to go through the granules.
        content_index = utils.download(content_detail_url,
                                       "fdsys/package/%s/%s/%s.html" % (sitemap["year"], sitemap["collection"], package_name),
                                       utils.merge(options, {
                                           'binary': True,
                                       }))
        if not content_index:
            raise Exception("Failed to download %s" % content_detail_url)
        for link in html.fromstring(content_index).cssselect("table.page-details-data-table td.rightLinkCell a"):
            if link.text == "More":
                m = re.match("granule/(.*)/(.*)/content-detail.html", link.get("href"))
                if not m or m.group(1) != package_name:
                    raise Exception("Unmatched granule URL %s" % link.get("href"))
                granule_name = m.group(2)
                results = mirror_package_or_granule(sitemap, package_name, granule_name, lastmod, options)

    return results

Ejemplo n.º 17

0

Mostrar archivo

Archivo: inserter.py Proyecto: pipihhh/reciter

 def append_mode(self, w):
     trans = self.initial_translation()
     trans = merge(self._word_dict[w]["cell"].comment.content, trans)
     if verify(w, trans):
         self._word_dict[w]["cell"].comment = Comment(text=trans,
                                                      author="Lee Mist")
         self.up_color_level(w)

Ejemplo n.º 18

0

Mostrar archivo

Archivo: amendments.py Proyecto: Web5design/congress

def run(options):
  amdt_id = options.get('amendment_id', None)
  
  search_state = { }

  if amdt_id:
    amdt_type, number, congress = utils.split_bill_id(amdt_id)
    to_fetch = [amdt_id]
  else:
    congress = options.get('congress', utils.current_congress())
    to_fetch = bill_ids_for(congress, utils.merge(options, {'amendments': True}), bill_states=search_state)
    if not to_fetch:
      if options.get("fast", False):
        logging.warn("No amendments changed.")
      else:
        logging.error("Error figuring out which amendments to download, aborting.")
      return None

    limit = options.get('limit', None)
    if limit:
      to_fetch = to_fetch[:int(limit)]

  if options.get('pages_only', False):
    return None

  logging.warn("Going to fetch %i amendments from congress #%s" % (len(to_fetch), congress))
  
  saved_amendments = utils.process_set(to_fetch, fetch_amendment, options)

  save_bill_search_state(saved_amendments, search_state)

Ejemplo n.º 19

0

Mostrar archivo

Archivo: votes.py Proyecto: illegalnumbers/congress

def vote_ids_for_senate(congress, session_year, options):
    session_num = int(session_year) - utils.get_congress_first_year(int(congress)) + 1

    vote_ids = []

    url = "http://www.senate.gov/legislative/LIS/roll_call_lists/vote_menu_%s_%d.xml" % (congress, session_num)
    page = utils.download(
        url,
        "%s/votes/%s/pages/senate.xml" % (congress, session_year),
        utils.merge(options, {'binary': True})
    )

    if not page or "Requested Page Not Found (404)" in page:
        logging.error("Couldn't download Senate vote XML index %s, skipping" % url)
        return None

    dom = etree.fromstring(page)

    # Sanity checks.
    if int(congress) != int(dom.xpath("congress")[0].text):
        logging.error("Senate vote XML returns the wrong Congress: %s" % dom.xpath("congress")[0].text)
        return None
    if int(session_year) != int(dom.xpath("congress_year")[0].text):
        logging.error("Senate vote XML returns the wrong session: %s" % dom.xpath("congress_year")[0].text)
        return None

    # Get vote list.
    for vote in dom.xpath("//vote"):
        num = int(vote.xpath("vote_number")[0].text)
        vote_id = "s" + str(num) + "-" + str(congress) + "." + session_year
        if not should_process(vote_id, options):
            continue
        vote_ids.append(vote_id)
    return vote_ids

Ejemplo n.º 20

0

Mostrar archivo

Archivo: merger.py Proyecto: cansubdc/ExcelMerger

    def merge_file(self):
        self.output = merge(self.files)

        if self.files.__len__() == 0:
            messagebox.showerror("Error Message",
                                 "You did not select source file")

        if self.output.empty:
            messagebox.showerror("Error Message", "Column Values Is Different")
            return
        else:
            f = filedialog.asksaveasfilename(defaultextension='.xlsx',
                                             filetypes=[("Default Excel file",
                                                         "*.xlsx"),
                                                        ("Excel file 97-2003",
                                                         "*.xls")])
            if not f.rsplit("/", 1)[1].endswith('.xlsx' or '.xls'):
                messagebox.showerror("Error Message",
                                     "Only xlsx and xls File Types")
                return

            try:
                writer = pd.ExcelWriter(f, engine='xlsxwriter')
                self.output.to_excel(writer)
                writer.save()
                self.listbox_list_of_files.delete(0, END)
                messagebox.showinfo("", "File Merge Completed")

            except Exception as e:
                messagebox.showerror('Awww', 'Unknown Error Occurred.')

Ejemplo n.º 21

0

Mostrar archivo

Archivo: app.py Proyecto: vyvydkf628/UGATIT

def save_image_to_memory(image):
    image = inverse_transform(image)
    image = merge(image, (1, 1))
    image = cv2.cvtColor(image.astype('uint8'), cv2.COLOR_RGB2BGR)

    is_success, buffer = cv2.imencode(".jpg", image)
    io_buf = io.BytesIO(buffer)
    return io_buf

Ejemplo n.º 22

0

Mostrar archivo

Archivo: unipacker.py Proyecto: Wannabe99/unipacker

    def do_del(self, args):
        """Removes breakpoints. Usage is the same as 'b', but the selected breakpoints and breakpoint ranges are being
deleted this time."""
        code_targets = []
        mem_targets = []
        global mem_breakpoints
        if not args:
            breakpoints.clear()
            mem_breakpoints.clear()
            apicall_handler.pending_breakpoints.clear()
        for arg in args.split(" "):
            if not arg:
                continue
            if arg == "stack":
                mem_targets += [(STACK_ADDR, STACK_ADDR + STACK_SIZE)]
            elif "m" == arg[0]:
                try:
                    parts = list(map(lambda p: int(p, 0), arg[1:].split("-")))
                    if len(parts) == 1:
                        lower = upper = parts[0]
                    else:
                        lower = min(parts)
                        upper = max(parts)
                    mem_targets += [(lower, upper)]
                except ValueError:
                    print(f"Error parsing address or range {arg}")
            elif "$" == arg[0]:
                arg = arg[1:]
                if arg in apicall_handler.hooks.values():
                    for addr, func_name in apicall_handler.hooks.items():
                        if arg == func_name:
                            code_targets += [addr]
                            break
                elif arg in apicall_handler.pending_breakpoints:
                    apicall_handler.pending_breakpoints.remove(arg)
                else:
                    print(
                        f"Unknown method {arg}, not imported or used in pending breakpoint"
                    )
            else:
                try:
                    code_targets += [int(arg, 0)]
                except ValueError:
                    print(f"Error parsing address {arg}")
        with data_lock:
            for t in code_targets:
                try:
                    breakpoints.remove(t)
                except KeyError:
                    pass
            new_mem_breakpoints = []
            for b_lower, b_upper in mem_breakpoints:
                for t_lower, t_upper in mem_targets:
                    new_mem_breakpoints += remove_range((b_lower, b_upper),
                                                        (t_lower, t_upper))
            mem_breakpoints = list(merge(new_mem_breakpoints))
            self.print_breakpoints()

Ejemplo n.º 23

0

Mostrar archivo

Archivo: algorithms.py Proyecto: BartlomiejRasztabiga/AISDI

def merge_sort(arr):
    if len(arr) < 2:
        return arr  # already sorted

    mid_ix = len(arr) // 2

    left = merge_sort(arr[:mid_ix])
    right = merge_sort(arr[mid_ix:])
    return merge(left, right)

Ejemplo n.º 24

0

Mostrar archivo

Archivo: SRCNN.py Proyecto: wh-forker/video-super-resolution

  def train(self, config):
    if config.is_train:
      input_setup(self.sess, config)
    else:
      nx, ny, arr = input_setup(self.sess, config)
      print(np.shape(arr))

    if config.is_train:     
      data_dir = os.path.join('./{}'.format(config.checkpoint_dir), "train.h5")
      train_data, train_label = read_data(data_dir,is_train=True)

    # Stochastic gradient descent with the standard backpropagation
    self.train_op = tf.train.GradientDescentOptimizer(config.learning_rate).minimize(self.loss)

    tf.global_variables_initializer().run()
    
    counter = 0
    start_time = time.time()

    if self.load(self.checkpoint_dir):
      print(" [*] Load SUCCESS")
    else:
      print(" [!] Load failed...")

    if config.is_train:
      print("Training...")

      for ep in range(config.epoch):
        # Run by batch images
        batch_idxs = len(train_data) // config.batch_size
        for idx in range(0, batch_idxs):
          batch_images = train_data[idx*config.batch_size : (idx+1)*config.batch_size]
          batch_labels = train_label[idx*config.batch_size : (idx+1)*config.batch_size]

          counter += 1
          _, err = self.sess.run([self.train_op, self.loss], feed_dict={self.images: batch_images, self.labels: batch_labels})

          if counter % 10 == 0:
            print("Epoch: [%2d], step: [%2d], time: [%4.4f], loss: [%.8f]" \
              % ((ep+1), counter, time.time()-start_time, err))

          if counter % 500 == 0:
            self.save(config.checkpoint_dir, counter)

    else:
      print("Testing...")
      for i in range(len(arr)):
        image = np.zeros((nx[i]*config.stride,ny[i]*config.stride,3))
        for j in range(3):         
          result = self.pred.eval({self.images: arr[i][:,:,:,j].reshape([nx[i]*ny[i], config.image_size, config.image_size, 1])})
          result = merge(result, [nx[i], ny[i]])
          result = result.squeeze()        
          image[:, :, j] = result

        image_path = os.path.join(os.getcwd(), config.sample_dir)
        image_path = os.path.join(image_path, "test_output%03d.png"%i)
        imsave(image, image_path)

Ejemplo n.º 25

0

Mostrar archivo

 def narrow_docids(self, idx):
     m0 = [
         decode_array(idx[feat]) for feat in self.feats if idx.has_key(feat)
     ]
     if not m0:
         return []
     refs = merge(m0)
     locs = [(refs[i], refs[i + 1]) for i in xrange(0, len(refs), 2)]
     return locs

Ejemplo n.º 26

0

Mostrar archivo

    def predict(self, test_image):

        arrdata, arrCrCb, nx, ny = pre_setting(test_image)

        result = self.sess.run(self.conv3, feed_dict={self.images: arrdata})

        result = merge(result, arrCrCb, [nx, ny])

        return result

Ejemplo n.º 27

0

Mostrar archivo

Archivo: fdsys.py Proyecto: rs19hack/congress

def get_sitemap(year, collection, lastmod, options):
  """Gets a single sitemap, downloading it if the sitemap has changed.
  
  Downloads the root sitemap (year==None, collection==None), or
  the sitemap for a year (collection==None), or the sitemap for
  a particular year and collection. Pass lastmod which is the current
  modification time of the file according to its parent sitemap, which
  is how it knows to return a cached copy.
  
  Returns the sitemap parsed into a DOM.
  """
  
  # Construct the URL and the path to where to cache the file on disk.
  if year == None:
    url = "http://www.gpo.gov/smap/fdsys/sitemap.xml"
    path = "fdsys/sitemap/sitemap.xml"
  elif collection == None:
    url = "http://www.gpo.gov/smap/fdsys/sitemap_%s/sitemap_%s.xml" % (year, year)
    path = "fdsys/sitemap/%s/sitemap.xml" % year
  else:
    url = "http://www.gpo.gov/smap/fdsys/sitemap_%s/%s_%s_sitemap.xml" % (year, year, collection)
    path = "fdsys/sitemap/%s/%s.xml" % (year, collection)
    
  # Should we re-download the file?
  lastmod_cache_file = utils.cache_dir() + "/" + path.replace(".xml", "-lastmod.txt")
  if options.get("cached", False):
    # If --cached is used, don't hit the network.
    force = False
  elif not lastmod:
    # No *current* lastmod date is known for this file (because it is the master
    # sitemap file, probably), so always download.
    force = True
  else:
    # If the file is out of date or --force is used, download the file.
    cache_lastmod = utils.read(lastmod_cache_file)
    force = (lastmod != cache_lastmod) or options.get("force", False)
    
  if force:
    logging.warn("Downloading: %s" % url)
    
  body = utils.download(url, path, utils.merge(options, {
    'force': force, 
    'binary': True
  }))
  
  if not body:
      raise Exception("Failed to download %s" % url)
      
  # Write the current last modified date to disk so we know the next time whether
  # we need to fetch the file.
  if lastmod and not options.get("cached", False):
    utils.write(lastmod, lastmod_cache_file)
  
  try:
    return etree.fromstring(body)
  except etree.XMLSyntaxError as e:
    raise Exception("XML syntax error in %s: %s" % (url, str(e)))

Ejemplo n.º 28

0

Mostrar archivo

Archivo: dcgan.py Proyecto: varov/PokemonGAN

    def save_imgs(self, epoch):
        r, c = 3, 3
        noise = np.random.normal(0, 1, (r * c, 100))
        gen_imgs = self.generator.predict(noise)

        # Rescale images 0 - 1
        gen_imgs = 0.5 * gen_imgs + 0.5
        #ims("images/pokemon_%d.png" % epoch,utils.merge(gen_imgs,[3,3]))
        ims('images/pokemon_%d.png'%epoch, utils.merge(gen_imgs,[3,3]))

Ejemplo n.º 29

0

Mostrar archivo

    def train(self, config):
        if config.is_train:
            input_setup(self.sess, config)
        else:
            nx, ny = input_setup(self.sess, config)

        if config.is_train:
            data_dir = os.path.join('./{}'.format(config.checkpoint_dir), "train.h5")
        else:
            data_dir = os.path.join('./{}'.format(config.checkpoint_dir), "test.h5")

        train_data, train_label = read_data(data_dir)

        # Stochastic gradient descent with the standard backpropagation
        self.train_op = tf.train.GradientDescentOptimizer(config.learning_rate).minimize(self.loss)

        tf.initialize_all_variables().run()

        counter = 0
        start_time = time.time()

        if self.load(self.checkpoint_dir):
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed...")

        if config.is_train:
            print("Training...")

            for ep in xrange(config.epoch):
                # Run by batch images
                batch_idxs = len(train_data) // config.batch_size
                for idx in xrange(0, batch_idxs):
                    batch_images = train_data[idx * config.batch_size: (idx + 1) * config.batch_size]
                    batch_labels = train_label[idx * config.batch_size: (idx + 1) * config.batch_size]

                    counter += 1
                    _, err = self.sess.run([self.train_op, self.loss],
                                           feed_dict={self.images: batch_images, self.labels: batch_labels})

                    if counter % 10 == 0:
                        print("Epoch: [%2d], step: [%2d], time: [%4.4f], loss: [%.8f]" \
                              % ((ep + 1), counter, time.time() - start_time, err))

                    if counter % 500 == 0:
                        self.save(config.checkpoint_dir, counter)

        else:
            print("Testing...")

            result = self.pred.eval({self.images: train_data, self.labels: train_label})

            result = merge(result, [nx, ny])
            result = result.squeeze()
            image_path = os.path.join(os.getcwd(), config.sample_dir)
            image_path = os.path.join(image_path, "test_image.png")
            imsave(result, image_path)

Ejemplo n.º 30

0

Mostrar archivo

Archivo: fdsys.py Proyecto: favila/congress

def mirror_file(year, collection, package_name, lastmod, granule_name,
                file_types, options):
    # Where should we store the file?
    path = get_output_path(year, collection, package_name, granule_name,
                           options)
    if not path: return  # should skip

    # Do we need to update this record?
    lastmod_cache_file = path + "/lastmod.txt"
    cache_lastmod = utils.read(lastmod_cache_file)
    force = ((lastmod != cache_lastmod) or options.get(
        "force", False)) and not options.get("cached", False)

    # Try downloading files for each file type.
    targets = get_package_files(package_name, granule_name, path)
    for file_type in file_types:
        if file_type not in targets:
            raise Exception("Invalid file type: %s" % file_type)
        f_url, f_path = targets[file_type]

        if (not force) and os.path.exists(f_path):
            continue  # we already have the current file
        logging.warn("Downloading: " + f_path)
        data = utils.download(
            f_url, f_path,
            utils.merge(
                options, {
                    'xml':
                    True,
                    'force':
                    force,
                    'to_cache':
                    False,
                    'needs_content':
                    file_type == "text" and f_path.endswith(".html"),
                }))

        if not data:
            if file_type == "pdf":
                # expected to be present for all packages
                raise Exception("Failed to download %s" % package_name)
            else:
                # not all packages have all file types, but assume this is OK
                logging.error("file not found: " + f_url)

        if file_type == "text" and f_path.endswith(".html"):
            # The "text" format files are put in an HTML container. Unwrap it into a .txt file.
            # TODO: Encoding? The HTTP content-type header says UTF-8, but do we trust it?
            #       html.fromstring does auto-detection.
            with open(f_path[0:-4] + "txt", "w") as f:
                text_content = unicode(html.fromstring(data).text_content())
                f.write(text_content.encode("utf8"))

    # Write the current last modified date to disk so we know the next time whether
    # we need to fetch the files for this sitemap item.
    if lastmod and not options.get("cached", False):
        utils.write(lastmod, lastmod_cache_file)

Ejemplo n.º 31

0

Mostrar archivo

Archivo: vote_info.py Proyecto: JT5D/congress

def fetch_vote(vote_id, options):
  logging.info("\n[%s] Fetching..." % vote_id)
  
  vote_chamber, vote_number, vote_congress, vote_session_year = utils.split_vote_id(vote_id)
  
  if vote_chamber == "h":
    url = "http://clerk.house.gov/evs/%s/roll%03d.xml" % (vote_session_year, int(vote_number))
  else:
    session_num = int(vote_session_year) - utils.get_congress_first_year(int(vote_congress)) + 1
    url = "http://www.senate.gov/legislative/LIS/roll_call_votes/vote%d%d/vote_%d_%d_%05d.xml" % (int(vote_congress), session_num, int(vote_congress), session_num, int(vote_number))
  
  # fetch vote XML page
  body = utils.download(
    url, 
    "%s/votes/%s/%s%s/%s%s.xml" % (vote_congress, vote_session_year, vote_chamber, vote_number, vote_chamber, vote_number),
    utils.merge(options, {'binary': True}),
    )

  if not body:
    return {'saved': False, 'ok': False, 'reason': "failed to download"}

  if options.get("download_only", False):
    return {'saved': False, 'ok': True, 'reason': "requested download only"}

  if "This vote was vacated" in body:
    # Vacated votes: 2011-484, 2012-327, ...
    # Remove file, since it may previously have existed with data.
    for f in (output_for_vote(vote_id, "json"), output_for_vote(vote_id, "xml")):
      if os.path.exists(f):
        os.unlink(f)
    return {'saved': False, 'ok': True, 'reason': "vote was vacated"}

  dom = etree.fromstring(body)

  vote = {
    'vote_id': vote_id,
    'chamber': vote_chamber,
    'congress': int(vote_congress),
    'session': vote_session_year,
    'number': int(vote_number),
    'updated_at': datetime.datetime.fromtimestamp(time.time()),
    'source_url': url,
  }
  
  # do the heavy lifting
  
  if vote_chamber == "h":
    parse_house_vote(dom, vote)
  elif vote_chamber == "s":
    parse_senate_vote(dom, vote)
    
  # output and return
  
  output_vote(vote, options)

  return {'ok': True, 'saved': True}

Ejemplo n.º 32

0

Mostrar archivo

    def train(self, config):
        err_li = []
        # NOTE : if train, the nx, ny are ingnored
        nx, ny = input_setup(config)

        data_dir = checkpoint_dir(config)

        input_, label_ = read_data(data_dir)
        # Stochastic gradient descent with the standard backpropagation
        #self.train_op = tf.train.GradientDescentOptimizer(config.learning_rate).minimize(self.loss)
        self.train_op = tf.train.AdamOptimizer(
            learning_rate=config.learning_rate).minimize(self.loss)  #最小化w,b
        tf.initialize_all_variables().run()  #session開始run

        counter = 0
        time_ = time.time()

        self.load(config.checkpoint_dir)
        # Train
        if config.is_train:
            print("Now Start Training...")
            for ep in range(config.epoch):  #總過跑幾次epoch
                # Run by batch images
                batch_idxs = len(input_) // config.batch_size
                for idx in range(0, batch_idxs):  #每次跑128個batch
                    batch_images = input_[idx * config.batch_size:(idx + 1) *
                                          config.batch_size]
                    batch_labels = label_[idx * config.batch_size:(idx + 1) *
                                          config.batch_size]
                    counter += 1
                    _, err = self.sess.run([self.train_op, self.loss],
                                           feed_dict={
                                               self.images: batch_images,
                                               self.labels: batch_labels
                                           })
                    err_li.append(
                        err)  #feed_dict會傳到build model的self.image和self.label裡
                    if counter % 10 == 0:
                        print(
                            "Epoch: [%2d], step: [%2d], time: [%4.4f], loss: [%.8f]"
                            % ((ep + 1), counter, time.time() - time_, err))
                        #print(label_[1] - self.pred.eval({self.images: input_})[1],'loss:]',err)
                    if counter % 500 == 0:
                        self.save(config.checkpoint_dir, counter)
        # Test
        else:
            print("Now Start Testing...")
            #print("nx","ny",nx,ny)

            result = self.pred.eval({self.images: input_})
            #print(label_[1] - result[1])
            image = merge(result, [nx, ny], self.c_dim)

            #checkimage(image)

            imsave(image, config.result_dir + '/result.png', config)

Ejemplo n.º 33

0

Mostrar archivo

def fetch_vote(vote_id, options):
  logging.info("\n[%s] Fetching..." % vote_id)
  
  vote_chamber, vote_number, vote_congress, vote_session_year = utils.split_vote_id(vote_id)
  
  if vote_chamber == "h":
    url = "http://clerk.house.gov/evs/%s/roll%03d.xml" % (vote_session_year, int(vote_number))
  else:
    session_num = int(vote_session_year) - utils.get_congress_first_year(int(vote_congress)) + 1
    url = "http://www.senate.gov/legislative/LIS/roll_call_votes/vote%d%d/vote_%d_%d_%05d.xml" % (int(vote_congress), session_num, int(vote_congress), session_num, int(vote_number))
  
  # fetch vote XML page
  body = utils.download(
    url, 
    "%s/votes/%s/%s%s/%s%s.xml" % (vote_congress, vote_session_year, vote_chamber, vote_number, vote_chamber, vote_number),
    utils.merge(options, {'binary': True}),
    )

  if not body:
    return {'saved': False, 'ok': False, 'reason': "failed to download"}

  if options.get("download_only", False):
    return {'saved': False, 'ok': True, 'reason': "requested download only"}

  if "This vote was vacated" in body:
    # Vacated votes: 2011-484, 2012-327, ...
    # Remove file, since it may previously have existed with data.
    for f in (output_for_vote(vote_id, "json"), output_for_vote(vote_id, "xml")):
      if os.path.exists(f):
        os.unlink(f)
    return {'saved': False, 'ok': True, 'reason': "vote was vacated"}

  dom = etree.fromstring(body)

  vote = {
    'vote_id': vote_id,
    'chamber': vote_chamber,
    'congress': int(vote_congress),
    'session': vote_session_year,
    'number': int(vote_number),
    'updated_at': datetime.datetime.fromtimestamp(time.time()),
    'source_url': url,
  }
  
  # do the heavy lifting
  
  if vote_chamber == "h":
    parse_house_vote(dom, vote)
  elif vote_chamber == "s":
    parse_senate_vote(dom, vote)
    
  # output and return
  
  output_vote(vote, options)

  return {'ok': True, 'saved': True}

Ejemplo n.º 34

0

Mostrar archivo

def get_train_test(test_start, test_stop, model='ar'):
    """Loads train and test data to datasets ... """
    #logger.info('Retrives data')
    if type == 'ar':
        files_train = get_list_of_files_excluding_period(test_start, test_stop)
        files_test = get_list_of_files(test_start, test_stop)

    else:
        files_train = get_list_of_files_excluding_period_traditional_model(
            test_start, test_stop)
        files_test = get_list_of_files_traditional_model(test_start, test_stop)

    #logger.info('Detected the relevant files. ')
    train_dataset = merge(files_train)
    #logger.info('Merged training data for {} to {}'.format(test_start,
    #test_stop))
    test_dataset = merge(files_test)
    #logger.info('Merged test data for {} to {}'.format(test_start, test_stop))
    return train_dataset, test_dataset

Ejemplo n.º 35

0

Mostrar archivo

def save_samples(sess, val_z, model, dir_name, global_step, shape):
    """
    Function to save samples during training

    """
    fake_samples = sess.run(model.G, {model.z: val_z})
    fake_samples = 255 * ((fake_samples + 1.) / 2.)
    merged_samples = utils.merge(fake_samples, size=shape)
    fn = "{:0>6d}.png".format(global_step)
    scipy.misc.imsave(os.path.join(dir_name, fn), merged_samples)

Ejemplo n.º 36

0

Mostrar archivo

Archivo: __init__.py Proyecto: binitGajera/language-modeling

 def _add(self, ngram):
     # print ngram
     if len(ngram) == 1:
         self.unigram_vocab.add(ngram[0])
         self.unigram_counter.add(ngram)
     if len(ngram) == 2:
         self.bigram_vocab.add(utils.merge(ngram))
         self.bigram_counter.add(ngram)
     if len(ngram) == 3:
         self.trigram_vocab.add(utils.merge(ngram))
         self.trigram_counter.add(ngram)
     if len(ngram) > 2:
         key = ' '.join(ngram[:-1])
         value = ngram[-1]
         try:
             self.adj_words[key].add(value)
         except:
             self.adj_words[key] = set()
             self.adj_words[key].add(value)

Ejemplo n.º 37

0

Mostrar archivo

Archivo: fdsys.py Proyecto: mjhanke/WeThePeople

def mirror_bulkdata_file(sitemap, url, item_path, lastmod, options):
    # Return a list of files we downloaded.
    results = []

    # Where should we store the file?
    path = "%s/fdsys/%s/%s" % (utils.data_dir(), sitemap["collection"],
                               item_path)

    # For BILLSTATUS, store this along with where we store the rest of bill
    # status data.
    if sitemap["collection"] == "BILLSTATUS":
        from bills import output_for_bill
        bill_id, version_code = get_bill_id_for_package(os.path.splitext(
            os.path.basename(item_path))[0],
                                                        with_version=False)
        path = output_for_bill(bill_id,
                               FDSYS_BILLSTATUS_FILENAME,
                               is_data_dot=False)

    # Where should we store the lastmod found in the sitemap so that
    # we can tell later if the file has changed?
    lastmod_cache_file = os.path.splitext(path)[0] + "-lastmod.txt"

    # Do we already have this file up to date?
    if os.path.exists(lastmod_cache_file) and not options.get("force", False):
        if lastmod == utils.read(lastmod_cache_file):
            return

    # With --cached, skip if the file is already downloaded.
    if os.path.exists(path) and options.get("cached", False):
        return

    # Download.
    logging.warn("Downloading: " + path)
    data = utils.download(
        url,
        path,
        utils.merge(
            options,
            {
                'binary': True,
                'force': True,  # decision to cache was made above
                'to_cache': False,
            }))
    results.append(path)

    if not data:
        # Something failed.
        return

    # Write the current last modified date back to disk so we know the next time whether
    # we need to fetch the file again.
    utils.write(lastmod, lastmod_cache_file)

    return results

Ejemplo n.º 38

0

Mostrar archivo

    def train(args):

        if args.is_train:
            input_setup(args)
        else:
            nx, ny = input_setup(args)

        counter = 0
        start_time = time.time()

        if args.is_train:
            print("Training...")
            data_dir = os.path.join('./{}'.format(args.checkpoint_dir),
                                    "train.h5")
            train_data, train_label = read_data(data_dir)

            display_step = 5
            for step in range(args.epochs):
                batch_idxs = len(train_data) // args.batch_size

                for idx in range(0, batch_idxs):

                    batch_images = train_data[idx * args.batch_size:(idx + 1) *
                                              args.batch_size]
                    batch_labels = train_label[idx *
                                               args.batch_size:(idx + 1) *
                                               args.batch_size]
                    run_optimization(batch_images, batch_labels)

                    if step % display_step == 0:
                        pred = srcnn(batch_images)
                        loss = mse(pred, batch_labels)
                        #psnr_loss = psnr(batch_labels, pred)
                        #acc = accuracy(pred, batch_y)

                        #print("step: %i, loss: %f", "psnr_loss: %f" %(step, loss, psnr_loss))
                        #print("Step:'{0}', Loss:'{1}', PSNR: '{2}'".format(step, loss, psnr_loss))

                        print("step: %i, loss: %f" % (step, loss))

        else:
            print("Testing...")
            data_dir = os.path.join('./{}'.format(args.checkpoint_dir),
                                    "test.h5")
            test_data, test_label = read_data(data_dir)

            result = srcnn(test_data)
            result = merge(result, [nx, ny])
            result = result.squeeze()

            image_path = os.path.join(os.getcwd(), args.sample_dir)
            image_path = os.path.join(image_path, "test_image.png")
            print(result.shape)
            imsave(result, image_path)

Ejemplo n.º 39

0

Mostrar archivo

def get_image_files(attrs):
    images = []
    directory = os.path.join(config.get('crawled', 'image_dir'), attrs)
    for dirname, subdirs, filenames in tf.gfile.Walk(directory):
        if len(subdirs) == 0:
            files = filter(lambda filename: 'jpg' in filename, filenames)
            filepaths = map(lambda file: os.path.join(dirname, file), files)
            images.append(filepaths)
    images = merge(images)
    print 'There are {} items in {}'.format(len(images), directory)
    return images

Ejemplo n.º 40

0

Mostrar archivo

 def narrow_docids(self, idx):
     m0 = [decode_array(idx[w]) for w in self.r0 if idx.has_key(w)]
     if self.r0 and not m0:
         return []
     m2 = [decode_array(idx[w]) for w in self.r2 if idx.has_key(w)]
     if self.r2 and not m2:
         return []
     if self.r1:
         try:
             refs = intersect(decode_array(idx[w]) for w in self.r1)
         except KeyError:
             return []
         refs = union(refs, [m for m in (m0, m2) if m])
     elif not self.r2:
         refs = merge(m0)
     else:
         refs = union(merge(m0), [m2])
     # Now: refs = [ docid1,sentid1, docid2,sentid2, ... ]
     locs = [(refs[i], refs[i + 1]) for i in xrange(0, len(refs), 2)]
     return locs

Ejemplo n.º 41

0

Mostrar archivo

Archivo: fdsys.py Proyecto: JT5D/congress

def mirror_file(year, collection, package_name, lastmod, granule_name, file_types, options):
  # Where should we store the file?
  path = get_output_path(year, collection, package_name, granule_name, options)
  if not path: return # should skip
  
  # Do we need to update this record?
  lastmod_cache_file = path + "/lastmod.txt"
  cache_lastmod = utils.read(lastmod_cache_file)
  force = ((lastmod != cache_lastmod) or options.get("force", False)) and not options.get("cached", False)
  
  # Try downloading files for each file type.
  targets = get_package_files(package_name, granule_name, path)
  updated_file_types = set()
  for file_type in file_types:
    if file_type not in targets: raise Exception("Invalid file type: %s" % file_type)
    f_url, f_path = targets[file_type]
    
    if (not force) and os.path.exists(f_path): continue # we already have the current file
    logging.warn("Downloading: " + f_path)
    data = utils.download(f_url, f_path, utils.merge(options, {
      'binary': True, 
      'force': force, 
      'to_cache': False,
      'needs_content': file_type == "text" and f_path.endswith(".html"),
    }))
    updated_file_types.add(file_type)
    
    if not data:
      if file_type == "pdf":
        # expected to be present for all packages
        raise Exception("Failed to download %s" % package_name)
      else:
        # not all packages have all file types, but assume this is OK
        logging.error("file not found: " + f_url)
        continue
    
    if file_type == "text" and f_path.endswith(".html"):
      # The "text" format files are put in an HTML container. Unwrap it into a .txt file.
      # TODO: Encoding? The HTTP content-type header says UTF-8, but do we trust it?
      #       html.fromstring does auto-detection.
      with open(f_path[0:-4] + "txt", "w") as f:
        text_content = unicode(html.fromstring(data).text_content())
        f.write(text_content.encode("utf8"))
        
  if collection == "BILLS" and "mods" in updated_file_types:
    # When we download bill files, also create the text-versions/data.json file
    # which extracts commonly used components of the MODS XML.
    from bill_versions import write_bill_version_metadata
    write_bill_version_metadata(get_bill_id_for_package(package_name, with_version=True))

  # Write the current last modified date to disk so we know the next time whether
  # we need to fetch the files for this sitemap item.
  if lastmod and not options.get("cached", False):
    utils.write(lastmod, lastmod_cache_file)

Ejemplo n.º 42

0

Mostrar archivo

Archivo: selection.py Proyecto: yasusii/fooling

 def narrow_docids(self, idx):
   m0 = [ decode_array(idx[w]) for w in self.r0 if idx.has_key(w) ]
   if self.r0 and not m0:
     return []
   m2 = [ decode_array(idx[w]) for w in self.r2 if idx.has_key(w) ]
   if self.r2 and not m2:
     return []
   if self.r1:
     try:
       refs = intersect( decode_array(idx[w]) for w in self.r1 )
     except KeyError:
       return []
     refs = union(refs, [ m for m in (m0,m2) if m ])
   elif not self.r2:
     refs = merge(m0)
   else:
     refs = union(merge(m0), [m2])
   # Now: refs = [ docid1,sentid1, docid2,sentid2, ... ]
   locs = [ (refs[i], refs[i+1]) for i in xrange(0, len(refs), 2) ]
   return locs

Ejemplo n.º 43

0

Mostrar archivo

Archivo: data_loader.py Proyecto: 317070/kaggle-heart

def generate_validation_batch(required_input_keys, required_output_keys, set="validation"):
    # generate sunny data
    sunny_length = get_lenght_of_set(name="sunny", set=set)
    regular_length = get_lenght_of_set(name="regular", set=set)

    sunny_batches = int(np.ceil(sunny_length / float(_config().sunny_batch_size)))
    regular_batches = int(np.ceil(regular_length / float(_config().batch_size)))

    if "sunny" in required_input_keys or "segmentation" in required_output_keys:
        num_batches = max(sunny_batches, regular_batches)
    else:
        num_batches = regular_batches

    num_chunks = int(np.ceil(num_batches / float(_config().batches_per_chunk)))

    sunny_chunk_size = _config().batches_per_chunk * _config().sunny_batch_size
    regular_chunk_size = _config().batches_per_chunk * _config().batch_size

    for n in xrange(num_chunks):

        result = {}
        input_keys_to_do  = list(required_input_keys)  # clone
        output_keys_to_do = list(required_output_keys) # clone

        if "sunny" in input_keys_to_do or "segmentation" in output_keys_to_do:

            indices = range(n*sunny_chunk_size, (n+1)*sunny_chunk_size)

            sunny_patient_data = get_sunny_patient_data(indices, set="train")
            result = utils.merge(result, sunny_patient_data)
            input_keys_to_do.remove("sunny")
            output_keys_to_do.remove("segmentation")

        indices = range(n*regular_chunk_size, (n+1)*regular_chunk_size)
        kaggle_data = get_patient_data(indices, input_keys_to_do, output_keys_to_do, set=set,
                                       preprocess_function=_config().preprocess_validation)

        result = utils.merge(result, kaggle_data)

        yield result

Ejemplo n.º 44

0

Mostrar archivo

Archivo: data_loader.py Proyecto: 317070/kaggle-heart

def generate_train_batch(required_input_keys, required_output_keys):
    """Creates an iterator that returns train batches."""

    sunny_chunk_size = _config().sunny_batch_size * _config().batches_per_chunk
    chunk_size = _config().batch_size * _config().batches_per_chunk

    while True:
        result = {}
        input_keys_to_do = list(required_input_keys) #clone
        output_keys_to_do = list(required_output_keys) #clone
        if "sunny" in input_keys_to_do or "segmentation" in output_keys_to_do:
            indices = _config().rng.randint(0, len(sunny_train_images), sunny_chunk_size)
            sunny_patient_data = get_sunny_patient_data(indices, set="train")
            result = utils.merge(result, sunny_patient_data)
            input_keys_to_do.remove("sunny")
            output_keys_to_do.remove("segmentation")

        indices = _config().rng.randint(0, len(train_patient_folders), chunk_size)  #
        kaggle_data = get_patient_data(indices, input_keys_to_do, output_keys_to_do, set="train",
                                       preprocess_function=_config().preprocess_train)

        result = utils.merge(result, kaggle_data)

        yield result

Ejemplo n.º 45

0

Mostrar archivo

Archivo: bill_versions.py Proyecto: GPHemsley/congress

def fetch_version(bill_version_id, options):
  # Download MODS etc.
	
  logging.info("\n[%s] Fetching..." % bill_version_id)
  
  bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)
  # bill_id = "%s%s-%s" % (bill_type, number, congress)

  utils.download(
    mods_url_for(bill_version_id), 
    document_filename_for(bill_version_id, "mods.xml"),
    utils.merge(options, {'binary': True, 'to_cache': False})
  )
  
  return write_bill_version_metadata(bill_version_id)

Ejemplo n.º 46

0

Mostrar archivo

Archivo: adler_wilkerson_bills.py Proyecto: Christopher-Churnick/congress

def run(options):
    # Download the TSV file.
    cache_zip_path = "adler-wilkerson-bills.zip"
    utils.download(
        "http://congressionalbills.org/billfiles/bills80-92.zip",
        cache_zip_path,
        utils.merge(options, {'binary': True, 'needs_content': False}))

    # Unzip in memory and process the records.
    zfile = zipfile.ZipFile(utils.cache_dir() + "/" + cache_zip_path)
    csvreader = csv.DictReader(zfile.open("bills80-92.txt"), delimiter="\t")
    for record in csvreader:
        rec = process_bill(record)

        import pprint
        pprint.pprint(rec)

Ejemplo n.º 47

0

Mostrar archivo

Archivo: amendments.py Proyecto: GPHemsley/congress

def run(options):
  amendment_id = options.get('amendment_id', None)
  bill_id = options.get('bill_id', None)
  
  search_state = { }

  if amendment_id:
    amendment_type, number, congress = utils.split_bill_id(amendment_id)
    to_fetch = [amendment_id]

  elif bill_id:
    # first, crawl the bill
    bill_type, number, congress = utils.split_bill_id(bill_id)
    bill_status = fetch_bill(bill_id, options)
    if bill_status['ok']:
      bill = json.loads(utils.read(output_for_bill(bill_id, "json")))
      to_fetch = [x["amendment_id"] for x in bill["amendments"]]
    else:
      logging.error("Couldn't download information for that bill.")
      return None

  else:
    congress = options.get('congress', utils.current_congress())

    to_fetch = bill_ids_for(congress, utils.merge(options, {'amendments': True}), bill_states=search_state)
    if not to_fetch:
      if options.get("fast", False):
        logging.warn("No amendments changed.")
      else:
        logging.error("Error figuring out which amendments to download, aborting.")

      return None

    limit = options.get('limit', None)
    if limit:
      to_fetch = to_fetch[:int(limit)]

  if options.get('pages_only', False):
    return None

  logging.warn("Going to fetch %i amendments from congress #%s" % (len(to_fetch), congress))
  saved_amendments = utils.process_set(to_fetch, fetch_amendment, options)

  # keep record of the last state of all these amendments, for later fast-searching
  save_bill_search_state(saved_amendments, search_state)

Ejemplo n.º 48

0

Mostrar archivo

Archivo: govinfo.py Proyecto: d0tN3t/congress

def mirror_bulkdata_file(collection, url, item_path, lastmod, options):
    # Return a list of files we downloaded.
    results = []

    # Where should we store the file?
    path = "%s/govinfo/%s/%s" % (utils.data_dir(), collection, item_path)

    # For BILLSTATUS, store this along with where we store the rest of bill
    # status data.
    if collection == "BILLSTATUS":
        from bills import output_for_bill
        bill_id, version_code = get_bill_id_for_package(os.path.splitext(os.path.basename(item_path.replace("BILLSTATUS-", "")))[0], with_version=False)
        path = output_for_bill(bill_id, FDSYS_BILLSTATUS_FILENAME, is_data_dot=False)

    # Where should we store the lastmod found in the sitemap so that
    # we can tell later if the file has changed?
    lastmod_cache_file = os.path.splitext(path)[0] + "-lastmod.txt"

    # Do we already have this file up to date?
    if os.path.exists(lastmod_cache_file) and not options.get("force", False):
        if lastmod == utils.read(lastmod_cache_file):
            return

    # With --cached, skip if the file is already downloaded.
    if os.path.exists(path) and options.get("cached", False):
        return

    # Download.
    logging.warn("Downloading: " + path)
    data = utils.download(url, path, utils.merge(options, {
        'binary': True,
        'force': True, # decision to cache was made above
        'to_cache': False,
    }))
    results.append(path)

    if not data:
        # Something failed.
        return

    # Write the current last modified date back to disk so we know the next time whether
    # we need to fetch the file again.
    utils.write(lastmod, lastmod_cache_file)

    return results

Ejemplo n.º 49

0

Mostrar archivo

Archivo: lazbot.py Proyecto: jdost/lazbot

    def start(self):
        """Start up the bot process

        Calls the ``connect`` method and then (if ``stream`` is set) begins the
        event loop.
        """
        login_data = self.connect()
        if not login_data:
            return None

        self.running = True
        for handler in self._hooks[events.SETUP]:
            handler(merge(login_data, {"client": self.client}))

        if self.stream:
            try:
                self.read()
            except:
                self.stop()
                raise

Ejemplo n.º 50

0

Mostrar archivo

Archivo: shape.py Proyecto: DeBraid/visual-analogy-tensorflow

  def test(self, name="test", options=None, fixed=False):
    if options == None:
      options = self.options

    t = strfnow()

    for option in options:
      if fixed == True:
        a, b, c, d = self.loader.tests[option]
      else:
        a, b, c, d = self.loader.next(set_option=option)

      feed = {self.a: a,
              self.b: b,
              self.c: c,
              self.d: d}

      fname = "%s/%s_option:%s_time:%s.png" % (self.sample_dir, name, option, t)
      g_img, g2_img, g3_img = self.sess.run([self.g1_img, self.g2_img, self.g3_img], feed_dict=feed)

      imsave(fname, merge(a, b, c, d, g_img, g2_img, g3_img))

Ejemplo n.º 51

0

Mostrar archivo

Archivo: gsmarena.py Proyecto: coolkunal64/gsmarena-scraper

def parse_data(url):
    r = requests.get(url, headers=utils.merge(DEFAULT_HEADERS, {}))
    soup = BeautifulSoup(r.text, "html.parser")

    if r.status_code != 200:
        return None

    full_data = {}
    for t in soup.select('table'):
        section = t.select('th')[0].contents[0]
        h = [get_contents(e.contents) for e in t.select('.ttl > a')]
        c = [get_contents(e.contents) for e in t.select('.nfo')]
        full_data[section] = dict(zip(h, c))

    new_data = {}
    for key, val in full_data.items():
        for subk, subv in val.items():
            new_data["%s:%s" % (key, subk)] = subv
            #print json.dumps({"%s:%s" % (key, subk): subv})

    return new_data

Ejemplo n.º 52

0

Mostrar archivo

Archivo: fdsys.py Proyecto: milimetric/congress

def get_sitemap(year, collection, lastmod, options):
    # Construct the URL and the path to where to cache the file on disk.
    if year == None:
        url = "http://www.gpo.gov/smap/fdsys/sitemap.xml"
        path = "fdsys/sitemap/sitemap.xml"
    elif collection == None:
        url = "http://www.gpo.gov/smap/fdsys/sitemap_%s/sitemap_%s.xml" % (year, year)
        path = "fdsys/sitemap/%s/sitemap.xml" % year
    else:
        url = "http://www.gpo.gov/smap/fdsys/sitemap_%s/%s_%s_sitemap.xml" % (year, year, collection)
        path = "fdsys/sitemap/%s/%s.xml" % (year, collection)

    # Should we re-download the file?
    lastmod_cache_file = utils.cache_dir() + "/" + path.replace(".xml", "-lastmod.txt")
    if options.get("cached", False):
        # If --cached is used, don't hit the network.
        force = False
    elif not lastmod:
        # No *current* lastmod date is known for this file (because it is the master
        # sitemap file, probably), so always download.
        force = True
    else:
        # If the file is out of date or --force is used, download the file.
        cache_lastmod = utils.read(lastmod_cache_file)
        force = (lastmod != cache_lastmod) or options.get("force", False)

    if force:
        logging.warn("Downloading: %s" % url)

    body = utils.download(url, path, utils.merge(options, {"force": force, "xml": True}))

    if not body:
        raise Exception("Failed to download %s" % url)

    # Write the current last modified date to disk so we know the next time whether
    # we need to fetch the file.
    if lastmod and not options.get("cached", False):
        utils.write(lastmod, lastmod_cache_file)

    return etree.fromstring(body)

Ejemplo n.º 53

0

Mostrar archivo

Archivo: govinfo.py Proyecto: d0tN3t/congress

def mirror_package_zipfile(collection, package_name, file_path, lastmod, lastmod_cache, options):
    # Do we already have this file updated?
    if lastmod_cache.get("package") == lastmod:
        if not options.get("force", False):
            return

    # With --cached, skip if the file is already downloaded.
    if os.path.exists(file_path) and options.get("cached", False):
        return

    # Download.
    file_url = GOVINFO_BASE_URL + "content/pkg/{}-{}.zip".format(collection, package_name)
    logging.warn("Downloading: " + file_path)
    data = utils.download(file_url, file_path, utils.merge(options, {
        'binary': True,
        'force': True, # decision to cache was made above
        'to_cache': False,
        'needs_content': False,
    }))

    # Update the lastmod of the downloaded file.
    lastmod_cache['package'] = lastmod
    return True

Ejemplo n.º 54

0

Mostrar archivo

Archivo: govinfo.py Proyecto: d0tN3t/congress

def update_sitemap2(url, current_lastmod, how_we_got_here, options, lastmod_cache, cache_file):
    # Return a list of files we downloaded.
    results = []

    # Download anew if the current_lastmod doesn't match the stored lastmod
    # in our cache, and if --cache is not specified. Or if --force is given.
    # If we're not downloading it, load it from disk because we still have
    # to process each sitemap to ensure we've downloaded all of the package
    # files the user wants.
    download = should_download_sitemap(lastmod_cache.get("lastmod"), current_lastmod, options)

    # Download, or just retreive from cache.
    if download:
        logging.warn("Downloading: %s" % url)
    body = utils.download(
        url,
        cache_file,
        utils.merge(options, {
            'force': download,
            'binary': True
        }))
    if not body:
        raise Exception("Failed to download %s" % url)

    # If we downloaded a new file, update the lastmod for our cache.
    if download and current_lastmod:
        lastmod_cache["lastmod"] = current_lastmod

    # Load the XML.
    try:
        sitemap = etree.fromstring(body)
    except etree.XMLSyntaxError as e:
        raise Exception("XML syntax error in %s: %s" % (url, str(e)))

    # Process the entries.
    if sitemap.tag == "{http://www.sitemaps.org/schemas/sitemap/0.9}sitemapindex":

        # This is a sitemap index. Process the sitemaps listed in this
        # sitemapindex recursively.
        for node in sitemap.xpath("x:sitemap", namespaces=ns):
            # Get URL and lastmod date of the sitemap.
            url = str(node.xpath("string(x:loc)", namespaces=ns))
            lastmod = str(node.xpath("string(x:lastmod)", namespaces=ns))
            sitemap_results = update_sitemap(url, lastmod, how_we_got_here, options)
            if sitemap_results is not None:
                results = results + sitemap_results

    elif sitemap.tag == "{http://www.sitemaps.org/schemas/sitemap/0.9}urlset":

        # This is a regular sitemap with content items listed.

        # Process the items.
        for node in sitemap.xpath("x:url", namespaces=ns):
            url = str(node.xpath("string(x:loc)", namespaces=ns))
            lastmod = str(node.xpath("string(x:lastmod)", namespaces=ns))

            m = re.match(COLLECTION_BASE_URL + r"([^-]+)-(.*)", url)
            if m:
                collection = m.group(1)
                package_name = m.group(2)
                if options.get("filter") and not re.search(options["filter"], package_name): continue
                try:
                    mirror_results = mirror_package(collection, package_name, lastmod, lastmod_cache.setdefault("packages", {}), options)
                except:
                    logging.exception("Error fetching package {} in collection {} from {}.".format(package_name, collection, url))
                    mirror_results = []
                results.extend(mirror_results)

            else:
                # This is a bulk data item. Extract components of the URL.
                m = re.match(BULKDATA_BASE_URL + r"([^/]+)/(.*)", url)
                if not m:
                    raise Exception("Unmatched bulk data file URL (%s) at %s." % (url, "->".join(how_we_got_here)))
                collection = m.group(1)
                item_path = m.group(2)
                if options.get("filter") and not re.search(options["filter"], item_path): continue
                try:
                    mirror_results = mirror_bulkdata_file(collection, url, item_path, lastmod, options)
                except:
                    logging.exception("Error fetching file {} in collection {} from {}.".format(item_path, collection, url))
                    mirror_results = None
                if mirror_results is not None and len(mirror_results) > 0:
                    results = results + mirror_results

    else:
        raise Exception("Unknown sitemap type (%s) at the root sitemap of %s." % (sitemap.tag, url))

    return results

Ejemplo n.º 55

0

Mostrar archivo

Archivo: fdsys.py Proyecto: hugovk/congress

def mirror_package_or_granule(sitemap, package_name, granule_name, lastmod, options):
    # Return a list of files we downloaded.
    results = []

    # Where should we store the file? Each collection has a different
    # file system layout (for BILLS, we put bill text along where the
    # bills scraper puts bills).
    path = get_output_path(sitemap, package_name, granule_name, options)
    if not path:
        return  # should skip

    # Get the lastmod times of the files previously saved for this package.
    file_lastmod_changed = False
    file_lastmod = { }
    lastmod_cache_file = path + "/lastmod.json"
    if os.path.exists(lastmod_cache_file):
        file_lastmod = json.load(open(lastmod_cache_file))

    # Try downloading files for each file type.
    targets = get_package_files(package_name, granule_name)
    for file_type, (file_url, relpath) in targets.items():
        # Does the user want to save this file type? If the user didn't
        # specify --store, save everything. Otherwise only save the
        # file types asked for.
        if options.get("store", "") and file_type not in options["store"].split(","):
            continue

        # Do we already have this file updated? The file_lastmod JSON
        # stores the lastmod from the sitemap at the time we downloaded
        # the individual file.
        if file_lastmod.get(file_type) == lastmod:
            if not options.get("force", False):
                continue

        # With --cached, skip if the file is already downloaded.
        file_path = os.path.join(path, relpath)
        if os.path.exists(file_path) and options.get("cached", False):
            continue

        # Download.
        logging.warn("Downloading: " + file_path)
        data = utils.download(file_url, file_path, utils.merge(options, {
            'binary': True,
            'force': True, # decision to cache was made above
            'to_cache': False,
            'return_status_code_on_error': True,
            'needs_content': (file_type == "text" and file_path.endswith(".html")),
        }))
        results.append(file_path)

        # Download failed?
        if data == 404:
            # Not all packages have all file types. Just check the ones we know
            # must be there.
            if file_type in ("pdf", "zip"):
                # expected to be present for all packages
                raise Exception("Failed to download %s %s (404)" % (package_name, file_type))
            elif sitemap["collection"] == "BILLS" and file_type in ("text", "mods"):
                # expected to be present for bills
                raise Exception("Failed to download %s %s (404)" % (package_name, file_type))
        elif data is True:
            # Download was successful but needs_content was False so we don't have the
            # file content. Instead, True is returned. Strangely isintance(True, int) is
            # True (!!!) so we have to test for True separately from testing if we got a
            # return code integer.
            pass
        elif not data or isinstance(data, int):
            # There was some other error - skip the rest. Don't
            # update file_lastmod!
            continue

        # Update the lastmod of the downloaded file. If the download failed,
        # because of a 404, we still update this to indicate that the file
        # definitively does not exist. We won't try fetcihng it again.
        file_lastmod[file_type] = lastmod
        file_lastmod_changed = True

        # The "text" format files are put in an HTML container. Unwrap it into a .txt file.
        # TODO: Encoding? The HTTP content-type header says UTF-8, but do we trust it?
        #       html.fromstring does auto-detection.
        if file_type == "text" and file_path.endswith(".html"):
            file_path_text = file_path[0:-4] + "txt"
            logging.info("Unwrapping HTML to: " + file_path_text)
            with open(file_path_text, "w") as f:
                f.write(unwrap_text_in_html(data))

        if sitemap["collection"] == "BILLS" and file_type == "mods":
            # When we download bill files, also create the text-versions/data.json file
            # which extracts commonly used components of the MODS XML, whenever we update
            # that MODS file.
            extract_bill_version_metadata(package_name, path)

    # Write the current last modified date back to disk so we know the next time whether
    # we need to fetch the files for this sitemap item. Assuming we fetched anything.
    # If nothing new was fetched, then there is no reason to update the file.
    if file_lastmod and file_lastmod_changed:
        utils.write(json.dumps(file_lastmod), lastmod_cache_file)

    return results

Ejemplo n.º 56

0

Mostrar archivo

Archivo: fdsys.py Proyecto: GPHemsley/congress

def mirror_package(year, collection, package_name, lastmod, granule_name, file_types, options):
  # Where should we store the file?
  path = get_output_path(year, collection, package_name, granule_name, options)
  if not path: return # should skip
  
  # Do we need to update this record?
  lastmod_cache_file = path + "/lastmod.txt"
  cache_lastmod = utils.read(lastmod_cache_file)
  force = ((lastmod != cache_lastmod) or options.get("force", False)) and not options.get("cached", False)
  
  # Try downloading files for each file type.
  targets = get_package_files(package_name, granule_name, path)
  updated_file_types = set()
  for file_type in file_types:
    if file_type not in targets: raise Exception("Invalid file type: %s" % file_type)
    
    # For BILLS, XML was not available until the 108th Congress, though even after that
    # it was spotty until the 111th or so Congress.
    if file_type == "xml" and collection == "BILLS" and int(package_name[6:9]) < 108:
      continue
    
    f_url, f_path = targets[file_type]
    
    if (not force) and os.path.exists(f_path): continue # we already have the current file
    logging.warn("Downloading: " + f_path)
    data = utils.download(f_url, f_path, utils.merge(options, {
      'binary': True, 
      'force': force, 
      'to_cache': False,
      'needs_content': file_type == "text" and f_path.endswith(".html"),
    }))
    updated_file_types.add(file_type)
    
    if not data:
      if file_type in ("pdf", "zip"):
        # expected to be present for all packages
        raise Exception("Failed to download %s" % package_name)
      else:
        # not all packages have all file types, but assume this is OK
        logging.error("file not found: " + f_url)
        continue
    
    if file_type == "text" and f_path.endswith(".html"):
      # The "text" format files are put in an HTML container. Unwrap it into a .txt file.
      # TODO: Encoding? The HTTP content-type header says UTF-8, but do we trust it?
      #       html.fromstring does auto-detection.
      with open(f_path[0:-4] + "txt", "w") as f:
      	f.write(unwrap_text_in_html(data))

    if file_type == "zip":
      # This is the entire package in a ZIP file. Extract the contents of this file
      # to the appropriate paths.
      with zipfile.ZipFile(f_path) as zf:
        for z2 in zf.namelist():
          if not z2.startswith(package_name + "/"): raise ValueError("Unmatched file name in package ZIP: " + z2)
          z2 = z2[len(package_name)+1:] # strip off leading package name

          if z2 in ("mods.xml", "premis.xml", "dip.xml"):
            # Extract this file to a file of the same name.
            z3 = path + "/" + z2
          elif z2 == "pdf/" + package_name + ".pdf":
            # Extract this file to "document.pdf".
            z3 = path + "/document.pdf"
          elif z2 == "html/" + package_name + ".htm":
            # Extract this file and unwrap text to "document.txt".
            z3 = path + "/document.txt"
          else:
            raise ValueError("Unmatched file name in package ZIP: " + z2)

          with zf.open(package_name + "/" + z2) as zff:
            with open(z3, "w") as output_file:
              data = zff.read()
              if z3 == path + "/document.txt": data = unwrap_text_in_html(data)
              output_file.write(data)
        
  if collection == "BILLS" and "mods" in updated_file_types:
    # When we download bill files, also create the text-versions/data.json file
    # which extracts commonly used components of the MODS XML.
    from bill_versions import write_bill_version_metadata
    write_bill_version_metadata(get_bill_id_for_package(package_name, with_version=True))

  # Write the current last modified date to disk so we know the next time whether
  # we need to fetch the files for this sitemap item.
  if lastmod and not options.get("cached", False):
    utils.write(lastmod, lastmod_cache_file)

Ejemplo n.º 57

0

Mostrar archivo

Archivo: fdsys.py Proyecto: GPHemsley/congress

def mirror_packages(fetch_collections, options):
  """Create a local mirror of FDSys document files. Only downloads
  changed files, according to the sitemap. Run update_sitemap_cache first.
  
  Pass fetch_collections as None, or to restrict the update to
  particular FDSys collections a set of collection names.
  
  Set options["store"] to a comma-separated list of file types (pdf,
  mods, text, xml, zip).
  """
  
  # For determining whether we need to process a sitemap file again on a later
  # run, we need to make a key out of the command line arguments that affect
  # which files we are downloading.
  cache_options_key = repr(tuple(sorted(kv for kv in options.items() if kv[0] in ("store", "year", "congress", "granules", "cached"))))
  
  file_types = options["store"].split(",")

  # Process each FDSys sitemap...
  for sitemap in sorted(glob.glob(utils.cache_dir() + "/fdsys/sitemap/*/*.xml")):
    # Should we process this file?
    year, collection = re.search(r"/(\d+)/([^/]+).xml$", sitemap).groups()
    if "year" in options and year != options["year"]: continue
    if "congress" in options and int(year) not in utils.get_congress_years(int(options["congress"])): continue 
    if fetch_collections and collection not in fetch_collections: continue
    
    # Has this sitemap changed since the last successful mirror?
    #
    # The sitemap's last modification time is stored in ...-lastmod.txt,
    # which comes from the sitemap's parent sitemap's lastmod listing for
    # the file.
    #
    # Compare that to the lastmod value of when we last did a successful mirror.
    # This function can be run to fetch different sets of files, so get the
    # lastmod value corresponding to the current run arguments.
    sitemap_store_state_file = re.sub(r"\.xml$", "-store-state.json", sitemap)
    sitemap_last_mod = open(re.sub(r"\.xml$", "-lastmod.txt", sitemap)).read()
    if os.path.exists(sitemap_store_state_file):
      sitemap_store_state = json.load(open(sitemap_store_state_file))
      if sitemap_store_state.get(cache_options_key) == sitemap_last_mod:
        # sitemap hasn't changed since the last time
        continue
    
    logging.info("scanning " + sitemap + "...")
    
    # Load the sitemap for this year & collection, and loop through each document.
    for package_name, lastmod in get_sitemap_entries(sitemap):
      # Add this package to the download list.
      file_list = []
      
      if not options.get("granules", False):
        # Doing top-level package files (granule==None).
        file_list.append(None)

      else:
        # In some collections, like STATUTE, each document has subparts which are not
        # described in the sitemap. Load the main HTML page and scrape for the sub-files.
        # In the STATUTE collection, the MODS information in granules is redundant with
        # information in the top-level package MODS file. But the only way to get granule-
        # level PDFs is to go through the granules.
        content_detail_url = "http://www.gpo.gov/fdsys/pkg/%s/content-detail.html" % package_name
        content_index = utils.download(content_detail_url,
            "fdsys/package/%s/%s/%s.html" % (year, collection, package_name),
            utils.merge(options, {
            'binary': True, 
          }))
        if not content_index: raise Exception("Failed to download %s" % content_detail_url)
        for link in html.fromstring(content_index).cssselect("table.page-details-data-table td.rightLinkCell a"):
          if link.text == "More":
            m = re.match("granule/(.*)/(.*)/content-detail.html", link.get("href"))
            if not m or m.group(1) != package_name: raise Exception("Unmatched granule URL %s" % link.get("href"))
            granule_name = m.group(2)
            file_list.append(granule_name)
        
      # Download the files of the desired types.
      for granule_name in file_list:
        mirror_package(year, collection, package_name, lastmod, granule_name, file_types, options)
        
    # If we got this far, we successfully downloaded all of the files in this year/collection.
    # To speed up future updates, save the lastmod time of this sitemap in a file indicating
    # what we downloaded. The store-state file contains a JSON mapping of command line options
    # to the most recent lastmod value for this sitemap.
    sitemap_store_state = { }
    if os.path.exists(sitemap_store_state_file):
      sitemap_store_state = json.load(open(sitemap_store_state_file))
    sitemap_store_state[cache_options_key] = sitemap_last_mod
    json.dump(sitemap_store_state, open(sitemap_store_state_file, "w"))

Ejemplo n.º 58

0

Mostrar archivo

Archivo: main.py Proyecto: amoliu/DCGAN-tensorflow

def main(_):
    pp.pprint(flags.FLAGS.__flags)

    if not os.path.exists(FLAGS.checkpoint_dir):
        os.makedirs(FLAGS.checkpoint_dir)
    if not os.path.exists(FLAGS.sample_dir):
        os.makedirs(FLAGS.sample_dir)

    with tf.Session() as sess:
        if FLAGS.dataset == 'mnist':
            dcgan = DCGAN(sess, image_size=FLAGS.image_size, batch_size=FLAGS.batch_size, y_dim=10,
                    dataset_name=FLAGS.dataset, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir)
        else:
            dcgan = DCGAN(sess, image_size=FLAGS.image_size, batch_size=FLAGS.batch_size,
                    dataset_name=FLAGS.dataset, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir)

        if FLAGS.is_train:
            dcgan.train(FLAGS)
        else:
            dcgan.load(FLAGS.checkpoint_dir)

        to_json("./web/js/layers.js", [dcgan.h0_w, dcgan.h0_b, dcgan.g_bn0],
                                      [dcgan.h1_w, dcgan.h1_b, dcgan.g_bn1],
                                      [dcgan.h2_w, dcgan.h2_b, dcgan.g_bn2],
                                      [dcgan.h3_w, dcgan.h3_b, dcgan.g_bn3],
                                      [dcgan.h4_w, dcgan.h4_b, None])

        # Below is codes for visualization
        OPTION = 2
        if OPTION == 0:
          z_sample = np.random.uniform(-0.5, 0.5, size=(FLAGS.batch_size, dcgan.z_dim))
          samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample})
          save_images(samples, [8, 8], './samples/test_%s.png' % strftime("%Y-%m-%d %H:%M:%S", gmtime()))
        elif OPTION == 1:
          values = np.arange(0, 1, 1./FLAGS.batch_size)
          for idx in xrange(100):
            print(" [*] %d" % idx)
            z_sample = np.zeros([FLAGS.batch_size, dcgan.z_dim])
            for kdx, z in enumerate(z_sample):
              z[idx] = values[kdx]

            samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample})
            save_images(samples, [8, 8], './samples/test_arange_%s.png' % (idx))
        elif OPTION == 2:
          values = np.arange(0, 1, 1./FLAGS.batch_size)
          for idx in [random.randint(0, 99) for _ in xrange(100)]:
            print(" [*] %d" % idx)
            z = np.random.uniform(-0.2, 0.2, size=(dcgan.z_dim))
            z_sample = np.tile(z, (FLAGS.batch_size, 1))
            #z_sample = np.zeros([FLAGS.batch_size, dcgan.z_dim])
            for kdx, z in enumerate(z_sample):
              z[idx] = values[kdx]

            samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample})
            make_gif(samples, './samples/test_gif_%s.gif' % (idx))
        elif OPTION == 3:
          values = np.arange(0, 1, 1./FLAGS.batch_size)
          for idx in xrange(100):
            print(" [*] %d" % idx)
            z_sample = np.zeros([FLAGS.batch_size, dcgan.z_dim])
            for kdx, z in enumerate(z_sample):
              z[idx] = values[kdx]

            samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample})
            make_gif(samples, './samples/test_gif_%s.gif' % (idx))
        elif OPTION == 4:
          image_set = []
          values = np.arange(0, 1, 1./FLAGS.batch_size)

          for idx in xrange(100):
            print(" [*] %d" % idx)
            z_sample = np.zeros([FLAGS.batch_size, dcgan.z_dim])
            for kdx, z in enumerate(z_sample): z[idx] = values[kdx]

            image_set.append(sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample}))
            make_gif(image_set[-1], './samples/test_gif_%s.gif' % (idx))

          new_image_set = [merge(np.array([images[idx] for images in image_set]), [10, 10]) for idx in range(64) + range(63, -1, -1)]
          make_gif(new_image_set, './samples/test_gif_merged.gif', duration=8)
        elif OPTION == 5:
          image_set = []
          values = np.arange(0, 1, 1./FLAGS.batch_size)
          z_idx = [[random.randint(0,99) for _ in xrange(5)] for _ in xrange(200)]

          for idx in xrange(200):
            print(" [*] %d" % idx)
            #z_sample = np.zeros([FLAGS.batch_size, dcgan.z_dim])
            z = np.random.uniform(-1e-1, 1e-1, size=(dcgan.z_dim))
            z_sample = np.tile(z, (FLAGS.batch_size, 1))

            for kdx, z in enumerate(z_sample):
              for jdx in xrange(5):
                z_sample[kdx][z_idx[idx][jdx]] = values[kdx]

            image_set.append(sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample}))
            make_gif(image_set[-1], './samples/test_gif_%s.gif' % (idx))

          new_image_set = [merge(np.array([images[idx] for images in image_set]), [10, 20]) for idx in range(64) + range(63, -1, -1)]
          make_gif(new_image_set, './samples/test_gif_random_merged.gif', duration=4)
        elif OPTION == 6:
          image_set = []

          values = np.arange(0, 1, 1.0/FLAGS.batch_size).tolist()
          z_idx = [[random.randint(0,99) for _ in xrange(10)] for _ in xrange(100)]

          for idx in xrange(100):
            print(" [*] %d" % idx)
            z = np.random.uniform(-0.2, 0.2, size=(dcgan.z_dim))
            z_sample = np.tile(z, (FLAGS.batch_size, 1))

            for kdx, z in enumerate(z_sample):
              for jdx in xrange(10):
                z_sample[kdx][z_idx[idx][jdx]] = values[kdx]

            image_set.append(sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample}))
            save_images(image_set[-1], [8, 8], './samples/test_random_arange_%s.png' % (idx))

          new_image_set = [merge(np.array([images[idx] for images in image_set]), [10, 10]) for idx in range(64) + range(63, -1, -1)]
          make_gif(new_image_set, './samples/test_gif_merged_random.gif', duration=4)
        elif OPTION == 7:
          for _ in xrange(50):
            z_idx = [[random.randint(0,99) for _ in xrange(10)] for _ in xrange(8)]

            zs = []
            for idx in xrange(8):
              z = np.random.uniform(-0.2, 0.2, size=(dcgan.z_dim))
              zs.append(np.tile(z, (8, 1)))

            z_sample = np.concatenate(zs)
            values = np.arange(0, 1, 1/8.)

            for idx in xrange(FLAGS.batch_size):
              for jdx in xrange(8):
                z_sample[idx][z_idx[idx/8][jdx]] = values[idx%8]

            samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample})
            save_images(samples, [8, 8], './samples/multiple_testt_%s.png' % strftime("%Y-%m-%d %H:%M:%S", gmtime()))
        elif OPTION == 8:
          counter = 0
          for _ in xrange(50):
            import scipy.misc
            z_idx = [[random.randint(0,99) for _ in xrange(10)] for _ in xrange(8)]

            zs = []
            for idx in xrange(8):
              z = np.random.uniform(-0.2, 0.2, size=(dcgan.z_dim))
              zs.append(np.tile(z, (8, 1)))

            z_sample = np.concatenate(zs)
            values = np.arange(0, 1, 1/8.)

            for idx in xrange(FLAGS.batch_size):
              for jdx in xrange(8):
                z_sample[idx][z_idx[idx/8][jdx]] = values[idx%8]

            samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample})
            for sample in samples:
              scipy.misc.imsave('./samples/turing/%s.png' % counter, sample)
              counter += 1
        else:
          import scipy.misc
          from glob import glob

          samples = []
          fnames = glob("/Users/carpedm20/Downloads/x/1/*.png")
          fnames = sorted(fnames, key = lambda x: int(x.split("_")[1]) * 10000 + int(x.split('_')[2].split(".")[0]))
          for f in fnames:
            samples.append(scipy.misc.imread(f))
          make_gif(samples, './samples/training.gif', duration=8, true_image=True)

Ejemplo n.º 59

0

Mostrar archivo

Archivo: apppackages.py Proyecto: shrewdlogarithm/pyst

def getappinfo(steamhome, logindata):

	librarydirs = {}
	librarydirs[steamhome["path"]] = True
	try:
		for steamapp in utils.steamapps:
			libraryvdf = utils.vdf.load(open(os.path.join(steamhome["path"],steamapp,"libraryfolders.vdf")))
			if libraryvdf is not None and "LibraryFolders" in libraryvdf:
				for folder in libraryvdf["LibraryFolders"]:
					librarydirs[libraryvdf["LibraryFolders"]["1"]] = True
	except:
		pass

	def checkinstalled(direxe):
		for lib in librarydirs:
			for steamapp in utils.steamapps:
				exe = os.path.join(lib,steamapp,"common",direxe)
				if os.path.exists(exe):
					return True
			return None

	appinfo = {}
	try:
		appinfofile = open(os.path.join(steamhome["path"],'appcache','appinfo.vdf'), 'rb')
		appinfodata = appinfofile.read().decode("ISO-8859-1")
		appinfofile.close()

		games = re.compile("\x02\x00common").split(appinfodata)
		appmatch = re.compile("\x02gameid\x00([\s\S]{3})")
		fsplit = re.compile("\x00[\x01\x02]")
		esplit = re.compile("(?=\x00\x01executable(?=\x00))")

		for game in games:
			gameid =  appmatch.search(game)
			if gameid and gameid.group(1):
				appid = utils.getnum(gameid.group(1))
				fielddict = {}
				fields = game.split(chr(1))
				for field in fields:
					vals = field.split(chr(0))
					if vals is not None and vals[0] is not None and vals[0] not in fielddict:
						try:
							fielddict[vals[0]] = vals[1]
						except:
							pass # probably an invalid key - ignore it
				appinfo[appid] = {
					"name": fielddict["name"].encode("ISO-8859-1"),
				    "type": ""
				}
				if "installdir" in fielddict:
					appinfo[appid]["path"] = fielddict["installdir"]
				if "dlcappid" in fielddict:
					appinfo[appid]["isdlc"] = True
				if "type" in fielddict:
					appinfo[appid]["type"] = fielddict["type"]
					if fielddict["type"].upper() == "DLC":
						appinfo[appid]["isdlc"] = True
				if "logo" in fielddict:
					appinfo[appid]["logo"] = "http://cdn.akamai.steamstatic.com/steamcommunity/public/images/apps/" + str(appid) + "/" + fielddict["logo"] + ".jpg"
				if "installdir" in fielddict:
					exes = utils.zerowidthsplit(esplit, game)
					exes.pop(0) # discard the leading bit
					for e in exes:
						exedata={}
						exefields = fsplit.split(e)
						for ef in exefields:
							exevals = ef.split(chr(0))
							if exevals[0] is not None and exevals[0] not in exedata:
								try:
									exedata[exevals[0]] = exevals[1]
								except:
									pass # probably an invalid key - ignore it
						if ("oslist" not in exedata or exedata["oslist"] == steamhome["platform"]) and ("osarch" not in exedata or exedata["osarch"] == steamhome["platarch"]):
							if os.path.isabs(exedata["executable"]):
								exedata["executable"] = exedata["executable"][1:]
							appinfo[appid]["exe"] = exedata["executable"]
							if "installdir" in fielddict and "executable" in exedata:
								appinfo[appid]["foundat"] = os.path.join(fielddict["installdir"],exedata["executable"])
							if "CheckGuid" in fielddict:
								appinfo[appid]["guid"] = fielddict["CheckGuid"]
							if "checkguids" in fielddict:
								appinfo[appid]["guids"] = fielddict["checkguids"]
							break
	except:
		pass

	packageinfo = {}
	try:
		packageinfofile = open(os.path.join(steamhome["path"],'appcache','packageinfo.vdf'), 'rb')
		packageinfodata = packageinfofile.read().decode("ISO-8859-1")
		packageinfofile.close()

		pmatch = re.compile("PackageID\x00([\s\S]{4})\x02")
		ematch = re.compile("\x02ExpiryTime\x00([\s\S]{4})")
		amatch = re.compile("\x02.*?\x00([\s\S]{3})\x00")
		packages = utils.zerowidthsplit(re.compile("(?=\x00appids)"),packageinfodata)

		for package in packages:
			pkgmatch = pmatch.search(package)
			if pkgmatch is not None:
				pkgid = utils.getnum(pkgmatch.group(1))
				packageinfo[pkgid] = {"name": "unknown", "appids": {}}
				try :
					expmatch = ematch.search(package)
					if expmatch is not None:
						exptime = int(utils.getnum(expmatch.group(1)))
						if exptime is not None and exptime < time.time():
							packageinfo[pkgid]["expired"] = True
				except (RuntimeError, TypeError, NameError):
					pass
				appidkey = package[8:]
				while appidkey[0:1] == chr(2):
					appmatch = amatch.match(appidkey)
					if appmatch is not None:
						appid = utils.getnum(appmatch.group(1))
						packageinfo[pkgid]["appids"][appid] = True
						appidkey = appidkey[len(appmatch.group(0)):]
	except:
		pass

	gamedb = {}
	for package in logindata["configdata"]["UserLocalConfigStore"]["Licenses"]:
		if package in packageinfo and "expired" not in packageinfo[package] and "appids" in packageinfo[package]:
			for appid in packageinfo[package]["appids"]:
				if appid in appinfo:
					if appid not in gamedb:
						gamedb[appid] = {
							"name": appinfo[appid]["name"]
						}
					if "logo" in appinfo[appid]:
						gamedb[appid]["gridimage"] = appinfo[appid]["logo"]
					if "packageid" not in gamedb[appid] or int(gamedb[appid]["packageid"]) < int(package):
						gamedb[appid]["data-packageid"] = package

	gamelist = {}
	conf = {}
	if "apps" in logindata["configdata"]["UserLocalConfigStore"]["Software"]["Valve"]["Steam"]:
		conf = logindata["configdata"]["UserLocalConfigStore"]["Software"]["Valve"]["Steam"]["apps"]
	for game in gamedb:
		if "type" in appinfo[game]:
			gametype = appinfo[game]["type"]
		if ("data-packageid" not in gamedb[game] or int(gamedb[game]["data-packageid"]) > 0) and (gametype == "" or gametype.upper() == "GAME" or gametype.upper() == "DLC"):
			confdata = {"LastPlayed": 0}
			if game in conf:
				confdata = utils.merge(confdata,conf[game])
			tags = ""
			if "tags" in confdata:
				for tag in confdata["tags"]:
					tags += "::" + confdata["tags"][tag]
			if "hidden" in confdata:
				tags += "::hidden"
			dataattrs = {
			    "data-categories": tags
			}
			if "LastPlayed" in confdata and int(confdata["LastPlayed"]) >= 1200000000:
				dataattrs["data-lastplayed"] = confdata["LastPlayed"]
			if game in appinfo:
				if "foundat" in appinfo[game] and appinfo[game]["foundat"] is not None and checkinstalled(appinfo[game]["foundat"]):
					dataattrs["data-exe"] = appinfo[game]["exe"]
					dataguids = ""
					if "guid" in appinfo[game]:
						dataguids = appinfo[game]["guid"]
					if "guids" in appinfo[game]:
						if dataguids:
							dataguids += ";"
						dataguids += appinfo[game]["guids"]
					if dataguids != "":
						dataattrs["data-guids"] = dataguids
				if "isdlc" in appinfo[game]:
					dataattrs["data-isdlc"] = True
			gamelist[game] = utils.merge(gamedb[game],dataattrs)

	return gamelist