def do_pickle(sol_id, all_traces, all_outputs, testcases, dest_dir):
    """
    Pickle the traces, outputs, and testcases. Cleans up after errors.

    Not sure why this is a separate function instead of just part of
    execute_and_pickle.
    """
    to_pickle = {
        'traces': all_traces,
        'outputs': all_outputs,
        'testcases': testcases
    }

    # Dump out
    pickle_path = path.join(dest_dir, sol_id + '.pickle')

    try:
        with open(pickle_path, 'w') as f:
            pickle.dump(to_pickle, f)
    except (pickle.PicklingError, TypeError):
        # If something goes wrong, clean up, then pass the exception back up
        # the stack
        print 'failed to pickle sol', sol_id
        os.remove(pickle_path)
        raise
Example #2
0
def savefile(path, tagdata):
    """Saves tagdata to file at path."""
    fp = open(path + ".new", "w")
    pickle.dump(tagdata, fp)
    fp.close()

    shutil.move(path + ".new", path)
  def process_file(self, cvs_file_items):
    marks = {}
    for lod_items in cvs_file_items.iter_lods():
      for cvs_rev in lod_items.cvs_revisions:
        if not isinstance(cvs_rev, CVSRevisionDelete):
          mark = self._mark_generator.gen_id()
          cvs_rev.revision_reader_token = mark
          marks[cvs_rev.rev] = mark

    if marks:
      # A separate pickler is used for each dump(), so that its memo
      # doesn't grow very large.  The default ASCII protocol is used so
      # that this works without changes on systems that distinguish
      # between text and binary files.
      pickle.dump((cvs_file_items.cvs_file.rcs_path, marks), self._pipe.stdin)
      self._pipe.stdin.flush()

    # Now that all CVSRevisions' revision_reader_tokens are set,
    # iterate through symbols and set their tokens to those of their
    # original source revisions:
    for lod_items in cvs_file_items.iter_lods():
      if lod_items.cvs_branch is not None:
        self._process_symbol(lod_items.cvs_branch, cvs_file_items)
      for cvs_tag in lod_items.cvs_tags:
        self._process_symbol(cvs_tag, cvs_file_items)
Example #4
0
 def rolf(self):
     print "running rolf..."
     for e in self.instanceList:
         self.dup_dict[e.id] = []
         self.dup_dict.get(e.id).append(e.id)
    
     try: 
         for indx, i in enumerate(self.instanceList[0:-2]):                
             for j in self.instanceList[indx+1:]:
                 if(self.getScore(i,j) >= 0.75):
                     self.dup_dict.get(i.id).append(j.id)
                     self.dup_dict.get(j.id).append(i.id)                
     except:
         print "unexpected error occurred : ", sys.exc_info()[0]
     finally:
         with open("dup-dict.obj", 'w') as dumpfile:
             cPickle.dump(self.dup_dict, dumpfile, protocol=cPickle.HIGHEST_PROTOCOL)
         f = open("submission-file.txt", 'w')
         for a, values in  self.dup_dict.values():
             f.write(a+" ")
             for v in values:
                 f.write(v+" ")
             f.write("\n")
         f.flush()
         f.close()
Example #5
0
 def save_data(self, filename, data):
     '''
     Saves the data structure using pickle. If the addon data path does 
     not exist it will be automatically created. This save function has
     the same restrictions as the pickle module.
     
     Args:
         filename (string): name of the file you want to save data to. This 
         file will be saved in your addon's profile directory.
         
         data (data object/string): you want to save.
         
     Returns:
         True on success
         False on failure
     '''
     profile_path = self.get_profile()
     try:
         os.makedirs(profile_path)
     except:
         pass
     save_path = os.path.join(profile_path, filename)
     try:
         pickle.dump(data, open(save_path, 'wb'))
         return True
     except pickle.PickleError:
         return False
Example #6
0
def saveJob(job, workflow, sandbox, wmTask = None, jobNumber = 0,
            owner = None, ownerDN = None,
            ownerGroup = '', ownerRole = '',
            scramArch = None, swVersion = None, agentNumber = 0 ):
    """
    _saveJob_

    Actually do the mechanics of saving the job to a pickle file
    """
    if wmTask:
            # If we managed to load the task,
            # so the url should be valid
        job['spec']     = workflow.spec
        job['task']     = wmTask
        if job.get('sandbox', None) == None:
            job['sandbox'] = sandbox

    job['counter']   = jobNumber
    job['agentNumber'] = agentNumber
    cacheDir         = job.getCache()
    job['cache_dir'] = cacheDir
    job['owner']     = owner
    job['ownerDN']   = ownerDN
    job['ownerGroup']   = ownerGroup
    job['ownerRole']   = ownerRole
    job['scramArch'] = scramArch
    job['swVersion'] = swVersion
    output = open(os.path.join(cacheDir, 'job.pkl'), 'w')
    cPickle.dump(job, output, cPickle.HIGHEST_PROTOCOL)
    output.close()


    return
Example #7
0
File: db.py Project: CETHop/sage
def save(x, filename, bzip2=False, gzip=False):
    """
    save(x, filename):

    Saves x to a file.  Pretty much the only constraint on x is that
    it have no circular references (it must be Python pickle-able).
    This uses the pickle module, so data you save is *guaranteed*
    to be readable by future versions of Python.

    INPUT:
       x -- almost arbitrary object
       filename -- a string

    OUTPUT:
       Creates a file named filename, from which the object x
       can be reconstructed.
    """

    o=open(filename,"w")
    # Note: don't use protocol 2 here (use 1), since loading doesn't work
    # on my extension types.
    cPickle.dump(x,o,1)
    o.close()
    if bzip2:
        os.system("bzip2 -f %s"%filename)
    if gzip:
        os.system("gzip -f %s"%filename)
Example #8
0
    def __init__(self, descrs, aggFunc='mean', caching=True):
        self.reportMissing = True
        self.caching = caching
        self.cached_file_name = None

        if isinstance(descrs, str):
            self.descrs_file = descrs
            self.descrs = pickle.load(open(self.descrs_file, 'rb'))
            self.cached_file_name = '%s-%s.pkl' % (self.descrs_file, aggFunc)
        elif isinstance(descrs, dict):
            self.descrs = descrs

        if self.caching and self.cached_file_name is not None and os.path.exists(self.cached_file_name):
            self.space = pickle.load(open(self.cached_file_name, 'rb'))
        elif aggFunc in ['mean', 'max']:
            if aggFunc == 'mean':
                f = self.aggMean
            elif aggFunc == 'max':
                f = self.aggMax

            self.space = {}
            for k in self.descrs.keys():
                vecs = self.descrs[k].values()
                if len(vecs) < 2:
                    if self.reportMissing:
                        print('Warning: Not enough vectors for key %s - skipping' % k)
                    continue
                self.space[k] = f(vecs)

            if self.caching and self.cached_file_name is not None:
                pickle.dump(self.space, open(self.cached_file_name, 'wb'))
Example #9
0
    def getDispersions(self, rescale=True):
        self.cached_dispersions_file = None
        if self.caching and hasattr(self, 'descrs_file'):
            self.cached_dispersions_file = '%s-dispersions.pkl' % (self.descrs_file)
            if os.path.exists(self.cached_dispersions_file):
                self.dispersions = pickle.load(open(self.cached_dispersions_file, 'rb'))
                return

        def disp(M):
            l = len(M)
            d, cnt = 0, 0
            for i in range(l):
                for j in range(i) + range(i+1, l):
                    d += (1 - cosine(M[i], M[j]))
                    cnt += 1
            return d / cnt if cnt != 0 else 0

        self.dispersions = {}
        min_disp, max_disp = 1, 0
        for k in self.descrs:
            imgdisp = disp(self.descrs[k].values())
            self.dispersions[k] = imgdisp
            if imgdisp > max_disp:
                max_disp, max_key = imgdisp, k
            if imgdisp < min_disp:
                min_disp, min_key = imgdisp, k

        # rescale
        if rescale:
            for k in self.dispersions:
                self.dispersions[k] = max(0, min(1, (self.dispersions[k] - min_disp) / (max_disp - min_disp)))

        if self.caching and self.cached_dispersions_file is not None:
            pickle.dump(self.dispersions, open(self.cached_dispersions_file, 'wb'))
def create_workload(generator, filename):
    import cPickle
    workload = [sample for sample in generator]

    f = open(filename, 'w')
    cPickle.dump(workload, f, cPickle.HIGHEST_PROTOCOL)
    f.close()
Example #11
0
 def test_pickle(self):
     """ test that the class can be pickled. This is required! """
     X, Y, Z = self.generate_data(nrows=200)
     task = mmDIFF()
     task.fit(X, Y, Z)
     with tempfile.TemporaryFile(mode='w+b') as tf:
         cPickle.dump(task, tf)
Example #12
0
    def writeBinModels(self, binIdToModels, filename):
        """Save HMM model info for each bin to file."""

        self.logger.info("  Saving HMM info to file.")

        with gzip.open(filename, "wb") as output:
            pickle.dump(binIdToModels, output, pickle.HIGHEST_PROTOCOL)
Example #13
0
def get_img2gist():
    try:
        img2gist = None
        with open(name2gist_file, 'rb') as f:
            print 'loading existed img2gist...'
            sys.stdout.flush()
            img2gist = pickle.load(f)
        return img2gist
    except Exception:
        img2gist = {}
        total_num = 0
        with open(train_file_map, 'r') as f:
            for line in f:
                if line.strip():
                    total_num += 1
        count = 0
        with open(train_file_map, 'r') as f:
            for line in f:
                if line.strip():
                    count += 1
                    arr = line.strip().split()
                    name = arr[0].strip()
                    rpath = arr[1].strip()
                    im = Image.open(pjoin(train_images_dir, rpath))
                    im = crop_resize(im, normal_size, True)
                    desc = leargist.color_gist(im)
                    img2gist[name] = desc
                    sys.stdout.write(
                        '%d/%d\r size:(%d, %d)    ' % (count, total_num, im.size[0], im.size[1]))
                    sys.stdout.flush()
        with open(name2gist_file, 'wb') as f:
            pickle.dump(img2gist, f)
        return img2gist
Example #14
0
    def split(self, dump_sub_results=None, make_sub_outputs=None,
              output_dir=None, output_file_list=None):

        if dump_sub_results is None:
            dump_sub_results = (self.result_dump_file is not None)
        if make_sub_outputs is None:
            make_sub_outputs = self.make_outputs

        if output_dir is None:
            output_dir = self.output_dir

        sub_treatments = [FMRITreatment(d, deepcopy(self.analyser),
                                        make_outputs=make_sub_outputs,
                                        output_dir=output_dir) \
                              for d in self.analyser.split_data(self.data)]

        if output_dir is not None:
            pyhrf.verbose(1, 'Dump sub treatments in: %s ...' %output_dir)
            cmp_size = lambda t1,t2:cmp(t1.data.get_nb_vox_in_mask(),
                                        t2.data.get_nb_vox_in_mask())

            for it, sub_t in enumerate(sorted(sub_treatments, cmp=cmp_size,
                                              reverse=True)):
                if dump_sub_results:
                    sub_t.result_dump_file = op.join(output_dir,
                                                     'result_%04d.pck' %it)
                fn = op.join(output_dir, 'treatment_%04d.pck' %it)
                fout = open(fn, 'w')
                cPickle.dump(sub_t, fout)
                fout.close()
                if output_file_list is not None:
                    output_file_list.append(fn)

        return sub_treatments
Example #15
0
def export_skin(file_path=None, shapes=None):
    """Exports the skinClusters of the given shapes to disk in a pickled list of skinCluster data.

    :param file_path: Path to export the data.
    :param shapes: Optional list of dag nodes to export skins from.  All descendent nodes will be searched for
    skinClusters also.
    """
    if shapes is None:
        shapes = cmds.ls(sl=True) or []

    # If no shapes were selected, export all skins
    skins = get_skin_clusters(shapes) if shapes else cmds.ls(type='skinCluster')
    if not skins:
        raise RuntimeError('No skins to export.')

    if file_path is None:
        file_path = cmds.fileDialog2(dialogStyle=2, fileMode=0, fileFilter='Skin Files (*{0})'.format(EXTENSION))
        if file_path:
            file_path = file_path[0]
    if not file_path:
        return
    if not file_path.endswith(EXTENSION):
        file_path += EXTENSION

    all_data = []
    for skin in skins:
        skin = SkinCluster(skin)
        data = skin.gather_data()
        all_data.append(data)
        logging.info('Exporting skinCluster %s (%d influences, %d vertices)',
                     skin.node, len(data['weights'].keys()), len(data['blendWeights']))
    fh = open(file_path, 'wb')
    pickle.dump(all_data, fh, pickle.HIGHEST_PROTOCOL)
    fh.close()
Example #16
0
def process_and_save(filename):
	"""
	Little script to do reading, selecting of the right data, getting 
	it in the right structure and then pickling it.
	This is supposed to make reading a lot faster when the light cones 
	are to be made.
	"""
	
	picklename = filename+'.pickled'
		
	# First check if the pickled version doesn't already exist
	if '/' in filename:
		dir = os.listdir(filename.rsplit('/', 1)[0])
	else: dir = os.listdir('.')
	
	if picklename.rsplit('/', 1)[1] in dir:
		print "Pickled version already exists for", filename
		return False
	
	data = read_bolshoi(filename, nopickle=True)
	if not data: return None
	
	with open(picklename, 'w') as picklefile:
		cPickle.dump(data, picklefile)
	
	return True
Example #17
0
def prep_test_data():
    test_file = './test/input_test_data.txt'
    output_test_file = './test/input_test.pkl'
     
    max_l = 100
    test_data = read_data_file(test_file, max_l)
    cPickle.dump(test_data, open(output_test_file, "wb"))
Example #18
0
def save(model, timings, post_fix=""):
    print "Saving the model..."

    # ignore keyboard interrupt while saving
    start = time.time()
    s = signal.signal(signal.SIGINT, signal.SIG_IGN)

    model.save(
        model.state["save_dir"] + "/" + model.state["run_id"] + "_" + model.state["prefix"] + post_fix + "model.npz"
    )
    cPickle.dump(
        model.state,
        open(
            model.state["save_dir"]
            + "/"
            + model.state["run_id"]
            + "_"
            + model.state["prefix"]
            + post_fix
            + "state.pkl",
            "w",
        ),
    )
    numpy.savez(
        model.state["save_dir"] + "/" + model.state["run_id"] + "_" + model.state["prefix"] + post_fix + "timing.npz",
        **timings
    )
    signal.signal(signal.SIGINT, s)

    print "Model saved, took {}".format(time.time() - start)
Example #19
0
	def set(self, key, value, timeout=None, version=None):
		key = self.make_key(key, version=version)
		self.validate_key(key)

		fname = self._key_to_file(key)
		dirname = os.path.dirname(fname)

		if timeout is None:
			timeout = self.default_timeout

		self._cull()

		try:
			if not os.path.exists(dirname):
				os.makedirs(dirname)

			f = open(fname, 'wb')
			try:
				now = time.time()
				pickle.dump(now + timeout, f, pickle.HIGHEST_PROTOCOL)
				pickle.dump(value, f, pickle.HIGHEST_PROTOCOL)
			finally:
				f.close()
		except (IOError, OSError):
			pass
def extract_point_cloud(tracks, loc, R):
    locations = []
    directions = []
    track_ids = []
    for track_idx in range(0, len(tracks)):
        track = tracks[track_idx]
        for pt_idx in range(0, len(track.utm)):
            pt = track.utm[pt_idx]
            if pt[0]>=loc[0]-R and pt[0]<=loc[0]+R and \
                    pt[1]>=loc[1]-R and pt[1]<=loc[1]+R:
                locations.append((pt[0], pt[1]))
               
                dir1 = np.array((0.0, 0.0))
                if pt_idx > 0:
                    dir1 = np.array((track.utm[pt_idx][0]-track.utm[pt_idx-1][0], track.utm[pt_idx][1]-track.utm[pt_idx-1][1]))

                dir2 = np.array((0.0, 0.0)) 
                if pt_idx < len(track.utm) - 1:
                    dir2 = np.array((track.utm[pt_idx+1][0]-track.utm[pt_idx][0], track.utm[pt_idx+1][1]-track.utm[pt_idx][1]))

                direction = dir1 + dir2
                
                direction_norm = np.linalg.norm(direction)
                if direction_norm > 1.0:
                    direction /= direction_norm
                else:
                    direction *= 0.0
                
                directions.append(direction)
                track_ids.append(track_idx)
    
    point_cloud = PointCloud(locations, directions, track_ids)
    with open("test_point_cloud.dat", "wb") as fout:
        cPickle.dump(point_cloud, fout, protocol=2)
Example #21
0
    def load(self, filename):
        """Optimized load and return the parsed version of filename.

        Uses the on-disk parse cache if the file is located in it.

        """
        # Compute sha1 hash (key)
        with open(filename) as fp:
            key = sha1(fp.read()).hexdigest()
        path = self.key_to_path(key)
        # Return the cached file if available
        if key in self.hashes:
            try:
                with open(path) as fp:
                    return cPickle.load(fp)
            except EOFError:
                os.unlink(path)
                self.hashes.remove(key)
            except IOError:
                self.hashes.remove(key)
        # Create the nested cache directory
        try:
            os.makedirs(os.path.dirname(path))
        except OSError as exc:
            if exc.errno != errno.EEXIST:
                raise
        # Process the file and save in the cache
        scratch = kurt.Project.load(filename)  # can fail
        with os.fdopen(os.open(path, os.O_WRONLY | os.O_CREAT,
                               0400), 'w') as fp:
            # open file for writing but make it immediately read-only
            cPickle.dump(scratch, fp, cPickle.HIGHEST_PROTOCOL)
Example #22
0
    def serialize(self, data, id):
        if self.__pid != 0:
            return

        self.__currentID = id

        # In-memory case
        if self.__fileName is None:
            self.__inMemorySerializedData = zlib.compress(cPickle.dumps(data, -1))
            self.__pid = -1
            return

        # File case
        pid = os.fork()
        if pid != 0:
            self.__pid = pid
            return

        try:
            tmpFile = self.__fileName + '.tmp'
            with open(tmpFile, 'wb') as f:
                with gzip.GzipFile(fileobj=f) as g:
                    cPickle.dump(data, g, -1)
            os.rename(tmpFile, self.__fileName)
            os._exit(0)
        except:
            os._exit(-1)
def all_training_examples_cached():
    global _all_examples
    if _all_examples is None:
        try:
            _all_examples, cnt = cPickle.load(myopen(training_examples_cache_filename()))
            assert len(_all_examples) == cnt
            logging.info("Successfully read %d training examples from %s" % (cnt, training_examples_cache_filename()))
            logging.info(stats())
        except:
            logging.info("(Couldn't read training examples from %s, sorry)" % (training_examples_cache_filename()))
            logging.info("Caching all training examples...")
            logging.info(stats())
            _all_examples = []
            for l1, l2, f1, f2, falign in bicorpora_filenames():
                for e in get_training_biexample(l1, l2, f1, f2, falign):
                    _all_examples.append(e)
                    if len(_all_examples) % 10000 == 0:
                        logging.info("\tcurrently have read %d training examples" % len(_all_examples))
                        logging.info(stats())
            random.shuffle(_all_examples)
            logging.info("...done caching all %d training examples" % len(_all_examples))
            logging.info(stats())

            cnt = len(_all_examples)
            cPickle.dump((_all_examples, cnt), myopen(training_examples_cache_filename(), "wb"), protocol=-1)
            assert len(_all_examples) == cnt
            logging.info("Wrote %d training examples to %s" % (cnt, training_examples_cache_filename()))
            logging.info(stats())
    assert _all_examples is not None
    return _all_examples
def pickle_dump(data, filename):
    """
    Equivalent to pickle.dump(data, open(filename, 'w'))
    but closes the file to prevent filehandle leakage.
    """
    with open(filename, 'wb') as fh:
        pickle.dump(data, fh)
Example #25
0
    def fit(self, X, y, valid_X=None, valid_y=None):
        input_size = X.shape[1]
        output_size = len(np.unique(y))
        X_sym = T.matrix('x')
        y_sym = T.ivector('y')
        self.layers_ = []
        self.layer_sizes_ = [input_size]
        self.layer_sizes_.extend(self.hidden_layer_sizes)
        self.layer_sizes_.append(output_size)
        self.dropout_layers_ = []
        self.training_scores_ = []
        self.validation_scores_ = []
        self.training_loss_ = []
        self.validation_loss_ = []

        if not hasattr(self, 'fit_function'):
            self._setup_functions(X_sym, y_sym,
                                  self.layer_sizes_)

        batch_indices = list(range(0, X.shape[0], self.batch_size))
        if X.shape[0] != batch_indices[-1]:
            batch_indices.append(X.shape[0])

        start_time = time.clock()
        itr = 0
        best_validation_score = np.inf
        while (itr < self.max_iter):
            print("Starting pass %d through the dataset" % itr)
            itr += 1
            batch_bounds = list(zip(batch_indices[:-1], batch_indices[1:]))
            # Random minibatches
            self.random_state.shuffle(batch_bounds)
            for start, end in batch_bounds:
                self.partial_fit(X[start:end], y[start:end])
            current_training_score = (self.predict(X) != y).mean()
            self.training_scores_.append(current_training_score)
            current_training_loss = self.loss_function(X, y)
            self.training_loss_.append(current_training_loss)
            # Serialize each save_frequency iteration
            if (itr % self.save_frequency) == 0 or (itr == self.max_iter):
                f = open(self.model_save_name + "_snapshot.pkl", 'wb')
                cPickle.dump(self, f, protocol=2)
                f.close()
            if valid_X is not None:
                current_validation_score = (
                    self.predict(valid_X) != valid_y).mean()
                self.validation_scores_.append(current_validation_score)
                current_training_loss = self.loss_function(valid_X, valid_y)
                self.validation_loss_.append(current_training_loss)
                print("Validation score %f" % current_validation_score)
                # if we got the best validation score until now, save
                if current_validation_score < best_validation_score:
                    best_validation_score = current_validation_score
                    f = open(self.model_save_name + "_best.pkl", 'wb')
                    cPickle.dump(self, f, protocol=2)
                    f.close()
        end_time = time.clock()
        print("Total training time ran for %.2fm" %
              ((end_time - start_time) / 60.))
        return self
Example #26
0
    def selective_search_IJCV_roidb(self):
        """
        Return the database of selective search regions of interest.
        Ground-truth ROIs are also included.

        This function loads/saves from/to a cache file to speed up future calls.
        """
        cache_file = os.path.join(self.cache_path,
                '{:s}_selective_search_IJCV_top_{:d}_roidb.pkl'.
                format(self.name, self.config['top_k']))

        if os.path.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                roidb = cPickle.load(fid)
            print '{} ss roidb loaded from {}'.format(self.name, cache_file)
            return roidb

        gt_roidb = self.gt_roidb()
        ss_roidb = self._load_selective_search_IJCV_roidb(gt_roidb)
        roidb = datasets.imdb.merge_roidbs(gt_roidb, ss_roidb)
        with open(cache_file, 'wb') as fid:
            cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
        print 'wrote ss roidb to {}'.format(cache_file)

        return roidb
def unigram_selection(i,path_to_training_set,path_to_pickle):

 (X_train, y_train, X_test, y_test,number_training, number_testing)= extract_data.extract_training_and_testing_set(
    path_to_training_set+'metrics_training_set_%d.data'%i,
    path_to_training_set+'metrics_testing_set_%d.data'%i)

 print(X_train[0].__len__())
 clf=svm.SVC(C=1, cache_size=2000, class_weight=None, coef0=0.0, degree=3,
 gamma=0.1, kernel='linear', max_iter=-1, probability=False, shrinking=True,
 tol=0.001, verbose=False)

 clf.fit(X_train, y_train)


 print ()
 print("Detailed classification report:")
 print()
 print("The model is trained on the full development set: %d" % number_training)
 print("The scores are computed on the full evaluation set: %d" % number_testing)
 print()
 y_true=y_test
 y_prediction=clf.predict(X_test)

 print(metrics.classification_report(y_true, y_prediction))

 clf_metrics=np.vstack((y_true,y_prediction))

 with open(path_to_pickle+'60000_all_features_%d.pkl'%i, 'wb') as fid :
    cPickle.dump(clf_metrics, fid)
 print()
Example #28
0
def create_cache_file(
    recid, uid, record="", cache_dirty=False, pending_changes=[], disabled_hp_changes={}, undo_list=[], redo_list=[]
):
    """Create a BibEdit cache file, and return revision and record. This will
    overwrite any existing cache the user has for this record.
    datetime.

    """
    if not record:
        record = get_bibrecord(recid)
        # Order subfields alphabetically after loading the record
        record_order_subfields(record)
        if not record:
            return

    file_path = "%s.tmp" % _get_file_path(recid, uid)
    record_revision = get_record_last_modification_date(recid)
    if record_revision == None:
        record_revision = datetime.now().timetuple()

    cache_file = open(file_path, "w")
    assert_undo_redo_lists_correctness(undo_list, redo_list)
    cPickle.dump(
        [cache_dirty, record_revision, record, pending_changes, disabled_hp_changes, undo_list, redo_list], cache_file
    )
    cache_file.close()
    return record_revision, record
Example #29
0
def get_qual_stats_dict(quals_dict, output_file_path = None, verbose = True):
    """This function takes quals dict (which can be obtained by calling the
       utils.utils.get_quals_dict function) and returns a dictionary that
       simply contains the summary of quality scores per location in the
       alignment"""

    # FIXME: get_quals_dict and get_qual_stats_dict functions are only for
    #        454 technology at this moment.

    progress = Progress()
    progress.verbose = verbose
    progress.new('Summary of quality scores per column is being computed')
    
    qual_stats_dict = {}
    alignment_length = len(quals_dict[quals_dict.keys()[0]])
    for pos in range(0, alignment_length):
        progress.update('Position: %d of %d' % (pos + 1, alignment_length))

        qual_stats_dict[pos] = {}
        quals_for_pos = [q[pos] for q in quals_dict.values() if q[pos]]
        if not quals_for_pos:
            qual_stats_dict[pos] = None
            continue
        qual_stats_dict[pos]['mean']  = np.mean(quals_for_pos)
        qual_stats_dict[pos]['std']   = np.std(quals_for_pos)
        qual_stats_dict[pos]['max']   = np.max(quals_for_pos)
        qual_stats_dict[pos]['min']   = np.min(quals_for_pos)
        qual_stats_dict[pos]['count'] = len(quals_for_pos)
    
    if output_file_path:
        cPickle.dump(quals_dict, open(output_file_path, 'w'))

    progress.end()
    return qual_stats_dict
    def saveSettings(self, *args):

        #this function will save out the user's preferences they have set in the UI to disk
        settingsLocation = self.mayaToolsDir + "/General/Scripts/projectSettings.txt"

        try:
            f = open(settingsLocation, 'w')


            #create a dictionary with  values
            settings = {}
            settings["UseSourceControl"] = cmds.checkBox(self.widgets["useSourceControl"], q = True, v = True)
            settings["FavoriteProject"] = cmds.optionMenu(self.widgets["favoriteProject_OM"], q = True, v = True)

            #write our dictionary to file
            cPickle.dump(settings, f)
            f.close()

        except:
            cmds.confirmDialog(title = "Error", icon = "critical", message = settingsLocation + " is not writeable. Please make sure this file is not set to read only.")



        #close the UI
        cmds.deleteUI("AnimationRiggingTool_SettingsUI")
Example #31
0
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import cross_val_score
from Functions import print_progress
import cPickle

datafile = raw_input("Pandas dataframe to open: ")
dataname = raw_input("Data savename: ")
X = pd.read_hdf("%s.h5" % datafile)
y = X.pop('Target 0')
y = y.astype(int)

mean_scores_l = list()
errors = list()
progress = 0
n_tree_range = range(1, 25)
print_progress(progress, len(n_tree_range), prefix='Progress', suffix='Complete', bar_length=50)
for i in n_tree_range:
    dtree = RandomForestClassifier(n_estimators=i)
    scores = cross_val_score(dtree, X, y, cv=10, scoring='accuracy')
    mean_scores_l.append(np.mean(scores))
    errors.append(np.std(scores))
    progress += 1
    print_progress(progress, len(n_tree_range), prefix='Progress', suffix='Complete', bar_length=50)

print mean_scores_l
print errors
data = zip(mean_scores_l, errors)
with open('%s.p' % dataname, 'wb') as f:
    cPickle.dump(data, f)
Example #32
0
def dump_vocabulary():
    """ Write the word ID map, passed as a parameter. """
    logger.info("Writing vocabulary to %s..." % config.VOCABULARY_FILE)
    with open(config.VOCABULARY_FILE, 'wb') as f:
        cPickle.dump(words, f)
Example #33
0
def test_DBN(
        finetune_lr=0.1,
        pretraining_epochs=10,  # TODO 100+
        pretrain_lr=0.01,
        k=1,
        training_epochs=42,  # TODO 100+
        dataset=DATASET,
        batch_size=12):
    """

    :type learning_rate: float
    :param learning_rate: learning rate used in the finetune stage
    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining
    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training
    :type k: int
    :param k: number of Gibbs steps in CD/PCD
    :type training_epochs: int
    :param training_epochs: maximal number of iterations ot run the optimizer
    :type dataset: string
    :param dataset: path the the pickled dataset
    :type batch_size: int
    :param batch_size: the size of a minibatch
    """

    print "loading dataset from", dataset
    ###datasets = load_data(dataset, nframes=N_FRAMES, unit=False, normalize=True, pca_whiten=True, cv_frac=0.0)
    datasets = load_data(dataset,
                         nframes=N_FRAMES,
                         unit=False,
                         normalize=True,
                         pca_whiten=False,
                         cv_frac=0.1)
    # unit=False because we don't want the [0-1] binary RBM projection
    # normalize=True because we want the data to be 0 centered with 1 variance.
    # pca_whiten=True because we want the data to be decorrelated

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    print "dataset loaded!"
    print "train set size", train_set_x.shape[0]
    print "validation set size", valid_set_x.shape[0]
    print "test set size", test_set_x.shape[0]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    print '... building the model'
    # construct the Deep Belief Network
    print "train_set_x.shape.eval()", train_set_x.shape.eval()
    assert (train_set_x.shape[1].eval() == N_FRAMES * 39)  # check
    dbn = DBN(numpy_rng=numpy_rng,
              n_ins=train_set_x.shape[1].eval(),
              hidden_layers_sizes=[300, 300, 300],
              n_outs=62 * 3)

    #########################
    # PRETRAINING THE MODEL #
    #########################
    print '... getting the pretraining functions'
    pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size,
                                                k=k)

    print '... pre-training the model'
    start_time = time.clock()
    ## Pre-train layer-wise
    for i in xrange(dbn.n_layers):
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                tmp_lr = pretrain_lr / (1. + 0.05 * batch_index)  # TODO
                if i == 0:
                    tmp_lr /= LEARNING_RATE_DENOMINATOR_FOR_GAUSSIAN
                c.append(pretraining_fns[i](index=batch_index, lr=tmp_lr))
            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print numpy.mean(c)
        with open(output_file_name + '_layer_' + str(i) + '.pickle', 'w') as f:
            cPickle.dump(dbn, f)
        print "dumped a partially pre-trained model"

    end_time = time.clock()
    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))

    ########################
    # FINETUNING THE MODEL #
    ########################
    #with open('dbn_Gaussian_gpu_layer_2.pickle') as f:
    #    dbn = cPickle.load(f)

    ###datasets = load_data(dataset, nframes=N_FRAMES, unit=False, normalize=True, cv_frac=0.2)
    ### # unit=False because we don't want the [0-1] binary RBM projection
    ### # normalize=True because we want the data to be 0 centered with 1 variance.
    ###train_set_x, train_set_y, valid_set_x, valid_set_y, test_set_x, test_set_y = None, None, None, None, None, None
    ###train_set_x, train_set_y = datasets[0]
    ###valid_set_x, valid_set_y = datasets[1]
    ###test_set_x, test_set_y = datasets[2]
    ###n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'
    train_fn, validate_model, test_model = dbn.build_finetune_functions(
        datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr)

    print '... finetuning the model'
    # early-stopping parameters
    patience = 4 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatches before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0

    print "number of training (fine-tuning) batches", n_train_batches
    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_fn(minibatch_index)
            iter = epoch * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:

                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    with open(output_file_name + '.pickle', 'w') as f:
                        cPickle.dump(dbn, f)

                    #improve patience if loss improvement is good enough
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
          (best_validation_loss * 100., test_score * 100.))
    print >> sys.stderr, ('The fine tuning code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))
Example #34
0
def train(data_opt, train_opt):
    print 'Initializing data provider...'
    dp_tr = MusicDataProvider(data_opt['dir_path'], 'train',
                              data_opt['pitch_range'], data_opt['dt'],
                              data_opt['batch_size'])
    dp_vl = MusicDataProvider(data_opt['dir_path'],
                              'valid',
                              data_opt['pitch_range'],
                              data_opt['dt'],
                              data_opt['batch_size'],
                              shuffle=False)

    print 'Done. Total training batches:', dp_tr.get_batch_num()

    print 'initializing parameters...'
    input_dim = dp_tr.get_data_dims()
    print 'Input feature dimension:', input_dim

    params = param_init('train',
                        train_opt,
                        input_dim,
                        isAdp=train_opt['isAdp'])
    tparams = OrderedDict()
    for kk, pp in params.items():
        tparams[kk] = theano.shared(params[kk].astype(np.float64), name=kk)

    print 'Building model...'
    f_grad, f_update = build_model_train(tparams, train_opt)
    print[(k, tparams[k].get_value().shape) for k in tparams.keys()]

    print 'Begin training...'
    uidx = 0
    prev_log_like = -np.inf
    omega = np.array([
        i * 2 * np.pi / train_opt['dim_feq']
        for i in range(train_opt['dim_feq'])
    ])
    for eidx in range(train_opt['max_epoch']):
        for bidx in range(dp_tr.get_batch_num()):
            [epoch, batch, [data, nframe]] = dp_tr.get_next_batch()

            online_cost = f_grad(data, nframe, omega)
            f_update(train_opt['lrate'])
            if bidx % 5 == 0:
                print 'batch {}-{}: {}'.format(eidx, bidx, online_cost)
                sys.stdout.flush()

            if uidx > 0 and uidx % train_opt['test_freq'] == 0:
                print '{} minibatch trained. Begin testing...'.format(uidx)
                test_log = 0
                test_cnt = 0
                for test_bidx in range(dp_vl.get_batch_num()):
                    [epoch, batch, [data, nframe]] = dp_vl.get_next_batch()
                    log_like = eval_batch(data, nframe, tparams, omega,
                                          train_opt)

                    test_log += np.sum(log_like)
                    test_cnt += nframe.shape[0]
                    if test_bidx % 10 == 0:
                        print 'batch-{} tested: {}'.format(
                            test_bidx, test_log / test_cnt)
                        sys.stdout.flush()
                test_log_like = test_log / test_cnt
                print 'Batch {}-{}, test {} samples, accuracy: {}'.format(
                    eidx, bidx, test_cnt, test_log_like)

                if test_log_like > prev_log_like:
                    print 'Best parameter so far found. Saving...'
                    param = unzip(tparams)
                    fo = open(train_opt['save_dir'], 'wb')
                    pickle.dump({
                        'param': param,
                        'log': test_log_like
                    },
                                fo,
                                protocol=pickle.HIGHEST_PROTOCOL)
                    fo.close()
                    prev_log_like = test_log_like
            uidx += 1
 def __dump(self, terms):
     from cPickle import dump
     print terms
     fp = open("plugins/extras/testData/glossary.glossary-terms", "w")
     dump(terms, fp)
     fp.close()
validationLabels = loader.loadData('myDataset/dev_label.txt')
testSentences = loader.loadData('myDataset/test.txt')
testLabels = loader.loadData('myDataset/test_label.txt')

# TRAIN THE MODEL
print '...training the DCNN'
for epoch in range(NUMOFEPOCHS):
    for i in xrange(len(trainingSentences)):
        trainDCNN(np.asarray(trainingSentences[i:i+1], dtype = np.int32), 
            np.asarray(trainingLabels[i], dtype = np.int32))
        print 'Sentence ', i, ' complete.'

# SAVE THE TRAINED MODEL
parameters = lasagne.layers.get_all_param_values(output)
with open('DCNNParameters.pkl', 'wb') as file:
    cPickle.dump(parameters, file, protocol = 2)

# VALIDATE THE MODEL
print '...running the DCNN on Validation Set'

accuracy = 0
for i in xrange(len(validationSentences)):
    score = validateDCNN(np.asarray(validationSentences[i:i+1], dtype = np.int32), 
        np.asarray(validationLabels[i], dtype = np.int32))
    accuracy += score
    print 'Sentence ', i, ' complete.'

accuracy /= float(len(validationSentences))
print "Accuracy in Validation =", accuracy

Example #37
0
def saveState():
	try:
		cPickle.dump(state, file(_stateFilename, "wb"))
	except:
		log.debugWarning("Error saving state", exc_info=True)
def GetBias(config, filename_results_direct, filename_results_reconv):
    """
    @brief load results, calculate the slope and offset for bias (g1_direct-g1_reconv) vs g1_reconv
    @param config dict used to create the simulations
    @param filename_results_direct results file for the reconv 
    @param filename_results_reconv results file for the direct
    @return dict with fields c1,m1,c2,m2,c1_std,c2_std,m1_std,m2_std
    Errors on m and c are empty for now.
    """

    name1 = os.path.basename(filename_results_reconv).replace(
        'results', '').replace('yaml', '').replace('cat',
                                                   '').replace('..', '.')
    name1 = name1.strip('.')
    name2 = os.path.basename(filename_results_direct).replace(
        'results', '').replace('yaml', '').replace('cat',
                                                   '').replace('..', '.')
    name2 = name2.strip('.')

    filename_pickle = 'results.%s.%s.pickle' % (name1, name2)
    import cPickle as pickle

    if os.path.isfile(filename_pickle):
        logging.info('using existing results file %s' % filename_pickle)
        pickle_dict = pickle.load(open(filename_pickle))
        bias_moments_list = pickle_dict['moments']
        bias_hsmcorr_list = pickle_dict['hsmcorr']

    else:
        logging.info('file %s not found, analysing results' % filename_pickle)

        # get number of shears, angles and galaxies, useful later
        n_shears = config['reconvolution_validation_settings']['n_shears']
        n_angles = config['reconvolution_validation_settings']['n_angles']
        n_gals = config['reconvolution_validation_settings']['n_gals']

        # initialise lists for results
        bias_moments_list = []
        bias_hsmcorr_list = []

        # load results
        results_direct = numpy.loadtxt(filename_results_direct)
        results_reconv = numpy.loadtxt(filename_results_reconv)

        # check if ring test is complete, we should have n_angles results for each galaxy and each shear
        for gi in range(n_gals):

            # initialise lists for results and truth
            moments_reconv_G1 = []
            moments_reconv_G2 = []
            hsmcorr_reconv_G1 = []
            hsmcorr_reconv_G2 = []
            moments_direct_G1 = []
            moments_direct_G2 = []
            hsmcorr_direct_G1 = []
            hsmcorr_direct_G2 = []
            true_G1 = []
            true_G2 = []

            # this will count how many shear we are using
            n_used_shears = 0

            # loop over shears
            for si in range(n_shears):

                # calculate indices of galaxies which belong to this ring test
                start_id = gi * si
                end_id = gi * si + n_angles

                # select galaxies from this ring
                select_reconv = numpy.logical_and(
                    results_reconv[:, 0] >= start_id,
                    results_reconv[:, 0] < end_id)
                select_direct = numpy.logical_and(
                    results_direct[:, 0] >= start_id,
                    results_direct[:, 0] < end_id)

                # count how many galaxies we got
                n_found_angles_reconv = sum(select_reconv)
                n_found_angles_direct = sum(select_direct)

                # initialise the variable which will tell us if to skip this shear
                skip_shear = False

                # do not include shear which has missing data
                if (n_found_angles_reconv !=
                        n_angles) or (n_found_angles_direct != n_angles):
                    skip_shear = True

                # do not include the shear which has an error in one of the angles
                for col in range(1, 7):
                    if any(results_reconv[select_reconv, col].astype(int) ==
                           HSM_ERROR_VALUE) or any(results_direct[
                               select_direct,
                               col].astype(int) == HSM_ERROR_VALUE):
                        skip_shear = True

                # continue with loop if bad ring
                if skip_shear:
                    logging.warning(
                        'gal %d shear %d has HSM errors or missing data- skipping'
                        % (gi, si))
                    continue

                # increment the number of used shears
                n_used_shears += 1

                # get the shear from the ring
                moments_reconv_G1.append(
                    numpy.mean(results_reconv[select_reconv, 1]))
                moments_reconv_G2.append(
                    numpy.mean(results_reconv[select_reconv, 2]))
                hsmcorr_reconv_G1.append(
                    numpy.mean(results_reconv[select_reconv, 3]))
                hsmcorr_reconv_G2.append(
                    numpy.mean(results_reconv[select_reconv, 4]))
                moments_direct_G1.append(
                    numpy.mean(results_direct[select_direct, 1]))
                moments_direct_G2.append(
                    numpy.mean(results_direct[select_direct, 2]))
                hsmcorr_direct_G1.append(
                    numpy.mean(results_direct[select_direct, 3]))
                hsmcorr_direct_G2.append(
                    numpy.mean(results_direct[select_direct, 4]))
                true_G1.append(config['reconvolved_images']['gal']['shear']
                               ['items'][si]['g1'])
                true_G2.append(config['reconvolved_images']['gal']['shear']
                               ['items'][si]['g2'])

            # convert to numpy
            moments_reconv_G1 = numpy.asarray(moments_reconv_G1)
            moments_reconv_G2 = numpy.asarray(moments_reconv_G2)
            hsmcorr_reconv_G1 = numpy.asarray(hsmcorr_reconv_G1)
            hsmcorr_reconv_G2 = numpy.asarray(hsmcorr_reconv_G2)
            moments_direct_G1 = numpy.asarray(moments_direct_G1)
            moments_direct_G2 = numpy.asarray(moments_direct_G2)
            hsmcorr_direct_G1 = numpy.asarray(hsmcorr_direct_G1)
            hsmcorr_direct_G2 = numpy.asarray(hsmcorr_direct_G2)
            true_G1 = numpy.asarray(true_G1)
            true_G2 = numpy.asarray(true_G2)

            # get the shear bias for moments
            c1, m1, cov1 = _getLineFit(true_G1,
                                       moments_direct_G1 - moments_reconv_G1,
                                       numpy.ones(moments_direct_G1.shape))
            c2, m2, cov2 = _getLineFit(true_G2,
                                       moments_direct_G2 - moments_reconv_G2,
                                       numpy.ones(moments_direct_G2.shape))

            # create result dict
            bias_moments = {
                'c1': c1,
                'm1': m1,
                'c2': c2,
                'm2': m2,
                'c1_std': 0.,
                'c2_std': 0.,
                'm1_std': 0.,
                'm2_std': 0.
            }

            # get the shear bias for hsmcorr
            c1, m1, cov1 = _getLineFit(true_G1,
                                       hsmcorr_direct_G1 - hsmcorr_reconv_G1,
                                       numpy.ones(hsmcorr_direct_G1.shape))
            c2, m2, cov2 = _getLineFit(true_G2,
                                       hsmcorr_direct_G2 - hsmcorr_reconv_G2,
                                       numpy.ones(hsmcorr_direct_G2.shape))

            # create result dict
            bias_hsmcorr = {
                'c1': c1,
                'm1': m1,
                'c2': c2,
                'm2': m2,
                'c1_std': 0.,
                'c2_std': 0.,
                'm1_std': 0.,
                'm2_std': 0.
            }

            if config['debug']:
                name1 = os.path.basename(filename_results_reconv).replace(
                    'results', '').replace('yaml',
                                           '').replace('cat', '').replace(
                                               'reconvolution_validation', '')
                name1 = name1.strip('.')
                name2 = os.path.basename(filename_results_direct).replace(
                    'results',
                    '').replace('yaml', '').replace('cat', '').replace(
                        'reconvolution_validation', '').replace('..', '.')
                name2 = name2.strip('.')

                filename_fig = 'fig.linefit.%s.%s.%03d.png' % (name1, name2,
                                                               gi)
                import pylab
                pylab.figure(figsize=(10, 5))
                pylab.plot(true_G1, moments_direct_G1 - moments_reconv_G1,
                           'bx')
                pylab.plot(true_G2, moments_direct_G2 - moments_reconv_G2,
                           'rx')
                pylab.plot(true_G1,
                           true_G1 * bias_moments['m1'] + bias_moments['c1'],
                           'b-')
                pylab.plot(true_G2,
                           true_G2 * bias_moments['m2'] + bias_moments['c2'],
                           'r-')
                x1, x2, y1, y2 = pylab.axis()
                pylab.axis((min(true_G1) * 1.1, max(true_G1) * 1.1, y1, y2))
                pylab.xlabel('true_Gi')
                pylab.ylabel('moments_direct_G1-moments_reconv_G1')
                pylab.legend(['G1', 'G2'])
                pylab.savefig(filename_fig)
                pylab.close()
                logging.info('saved figure %s' % filename_fig)

            logging.info(
                'gal %3d used %3d shears, m1 = % 2.3e, m2=% 2.3e ' %
                (gi, n_used_shears, bias_moments['m1'], bias_moments['m2']))

            # append the results list
            bias_moments_list.append(bias_moments)
            bias_hsmcorr_list.append(bias_hsmcorr)

        # may want to scatter plot the m1,m2 of all galaxies in the results file
        if config['debug']:
            name1 = os.path.basename(filename_results_reconv).replace(
                'results',
                '').replace('yaml',
                            '').replace('cat',
                                        '').replace('reconvolution_validation',
                                                    '')
            name1 = name1.strip('.')
            name2 = os.path.basename(filename_results_direct).replace(
                'results',
                '').replace('yaml',
                            '').replace('cat',
                                        '').replace('reconvolution_validation',
                                                    '').replace('..', '.')
            name2 = name2.strip('.')
            filename_fig = 'fig.mscatter.%s.%s.png' % (name1, name2)
            m1_list = numpy.asarray([b['m1'] for b in bias_moments_list])
            m2_list = numpy.asarray([b['m2'] for b in bias_moments_list])

            pylab.figure()
            pylab.scatter(m1, m2)
            pylab.savefig(filename_fig)
            pylab.close()

        pickle_dict = {
            'moments': bias_moments_list,
            'hsmcorr': bias_hsmcorr_list
        }
        pickle.dump(pickle_dict, open(filename_pickle, 'w'), protocol=2)
        logging.info('saved %s' % filename_pickle)

    return bias_moments_list, bias_hsmcorr_list
Example #39
0
    def plot(self):
        """Plot chart"""
        Preferences = ConfigParser()
        Preferences.read("psyrc")

        self.diagrama2D.axes2D.clear()
        self.diagrama2D.config()
        filename = "%i.pkl" % P
        if os.path.isfile(filename):
            with open(filename, "r") as archivo:
                data = cPickle.load(archivo)
                self.status.setText("Loading cached data...")
                QApplication.processEvents()
        else:
            self.progressBar.setVisible(True)
            self.status.setText("Calculating data, be patient...")
            QApplication.processEvents()
            data = PsyCoolprop.calculatePlot(self)
            cPickle.dump(data, open(filename, "w"))
            self.progressBar.setVisible(False)
        self.status.setText("Plotting...")
        QApplication.processEvents()

        tmax = Preferences.getfloat("Psychr", "isotdbEnd") - 273.15

        t = [ti - 273.15 for ti in data["t"]]
        Hs = data["Hs"]
        format = {}
        format["ls"] = Preferences.get("Psychr", "saturationlineStyle")
        format["lw"] = Preferences.getfloat("Psychr", "saturationlineWidth")
        format["color"] = Preferences.get("Psychr", "saturationColor")
        format["marker"] = Preferences.get("Psychr", "saturationmarker")
        format["markersize"] = 3
        self.diagrama2D.plot(t, Hs, **format)

        format = {}
        format["ls"] = Preferences.get("Psychr", "isotdblineStyle")
        format["lw"] = Preferences.getfloat("Psychr", "isotdblineWidth")
        format["color"] = Preferences.get("Psychr", "isotdbColor")
        format["marker"] = Preferences.get("Psychr", "isotdbmarker")
        format["markersize"] = 3
        for i, T in enumerate(t):
            self.diagrama2D.plot([T, T], [0, Hs[i]], **format)

        H = data["H"]
        th = data["th"]
        format = {}
        format["ls"] = Preferences.get("Psychr", "isowlineStyle")
        format["lw"] = Preferences.getfloat("Psychr", "isowlineWidth")
        format["color"] = Preferences.get("Psychr", "isowColor")
        format["marker"] = Preferences.get("Psychr", "isowmarker")
        format["markersize"] = 3
        for i, H in enumerate(H):
            self.diagrama2D.plot([th[i], tmax], [H, H], **format)

        format = {}
        format["ls"] = Preferences.get("Psychr", "isohrlineStyle")
        format["lw"] = Preferences.getfloat("Psychr", "isohrlineWidth")
        format["color"] = Preferences.get("Psychr", "isohrColor")
        format["marker"] = Preferences.get("Psychr", "isohrmarker")
        format["markersize"] = 3
        for Hr, H0 in data["Hr"].iteritems():
            self.diagrama2D.plot(t, H0, **format)
            self.drawlabel("isohr", Preferences, t, H0, Hr, "%")

        format = {}
        format["ls"] = Preferences.get("Psychr", "isotwblineStyle")
        format["lw"] = Preferences.getfloat("Psychr", "isotwblineWidth")
        format["color"] = Preferences.get("Psychr", "isotwbColor")
        format["marker"] = Preferences.get("Psychr", "isotwbmarker")
        format["markersize"] = 3
        for T, (H, Tw) in data["Twb"].iteritems():
            self.diagrama2D.plot(Tw, H, **format)
            value = T - 273.15
            txt = u"ºC"
            self.drawlabel("isotwb", Preferences, Tw, H, value, txt)

        format = {}
        format["ls"] = Preferences.get("Psychr", "isochorlineStyle")
        format["lw"] = Preferences.getfloat("Psychr", "isochorlineWidth")
        format["color"] = Preferences.get("Psychr", "isochorColor")
        format["marker"] = Preferences.get("Psychr", "isochormarker")
        format["markersize"] = 3
        for v, (Td, H) in data["v"].iteritems():
            self.diagrama2D.plot(Td, H, **format)
            value = v
            txt = u"m³/kg"
            self.drawlabel("isochor", Preferences, Td, H, value, txt)

        self.diagrama2D.draw()
        self.status.setText("P = %i Pa" % P)
Example #40
0
 def DumpPickleRunsEventTimes(self):
     if len(self.runs_evets_times) > 0:
         picklepath = '{d}/{f}.pkl'.format(d=self.currents_logs_dir, f=self.testbeam_log_file_name)
         pickle.dump(self.runs_evets_times, open(picklepath, 'wb'))
         print 'Created pickle file with the event times for all the runs in the testbeam'
Example #41
0
    if recompute_index:
        print 'building an index for faster search...'
        for pid in db:
            p = db[pid]
            dict_title = makedict(p['title'], forceidf=5, scale=3)
            dict_authors = makedict(' '.join(x['name'] for x in p['authors']),
                                    forceidf=5)
            if 'and' in dict_authors:
                # special case for "and" handling in authors list
                del dict_authors['and']
            dict_summary = makedict(p['summary'])
            SEARCH_DICT[pid] = merge_dicts(
                [dict_title, dict_authors, dict_summary])
        # and cache it in file
        print 'writing search_dict.p as cache'
        pickle.dump(SEARCH_DICT, open('search_dict.p', 'wb'))
    else:
        print 'loading cached index for faster search...'
        SEARCH_DICT = pickle.load(open('search_dict.p', 'rb'))

    # start
    if args.prod:
        # run on Tornado instead, since running raw Flask in prod is not recommended
        print 'starting tornado!'
        from tornado.wsgi import WSGIContainer
        from tornado.httpserver import HTTPServer
        from tornado.ioloop import IOLoop
        from tornado.log import enable_pretty_logging
        enable_pretty_logging()
        http_server = HTTPServer(WSGIContainer(app))
        http_server.listen(args.port)
Example #42
0
def save_pickle(data, data_path):
    with open(data_path, "w") as f:
        pickle.dump(data, f)
Example #43
0

if __name__ == '__main__':
    try:
        which = sys.argv[1]
        writer = {
            'megam': feat_writer.megam_writer,
            'crfsuite': feat_writer.crfsuite_writer
        }[which]
        out_dir = sys.argv[2]
        train_in, dev_in, test_in = sys.argv[3:6]

        colloc = sys.argv[6]
    except:
        print 'Usage: {} which(=megam|crfsuite) out_dir train dev test colloc'.format(
            sys.argv[0])
        exit(1)

    with open(colloc) as f:
        COLLOCS = set(f.read().split())
        MAX_COLLOCS_LEN = max([len(i.split('_')) for i in COLLOCS])

    for (purpose, path) in zip(["train", "dev", "test"],
                               [train_in, dev_in, test_in]):
        with open(path) as fi:
            with open(out_dir + '/' + purpose + '.' + which, 'w') as fo:
                writer(iter_features(common.lazy_load_dyads(fi)), fo)

    with open(out_dir + '/' + 'map.' + which, 'w') as f:
        cPickle.dump(LABEL_ID, f)
def test_net(net, imdb):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # heuristic: keep an average of 40 detections per class per images prior
    # to NMS
    max_per_set = 40 * num_images
    # heuristic: keep at most 100 detection per class per image prior to NMS
    max_per_image = 100
    # detection thresold for each class (this is adaptively set based on the
    # max_per_set constraint)
    thresh = -np.inf * np.ones(imdb.num_classes)
    # top_scores will hold one minheap of scores per class (used to enforce
    # the max_per_set constraint)
    top_scores = [[] for _ in xrange(imdb.num_classes)]
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, depth, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, net)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    roidb = imdb.roidb
    for i in xrange(num_images):
        im = cv2.imread(imdb.image_path_at(i))
        _t['im_detect'].tic()
        scores, boxes = im_detect(net, im, roidb[i]['boxes'])
        depths = roidb[i]['depths']
        _t['im_detect'].toc()

        _t['misc'].tic()
        for j in xrange(1, imdb.num_classes):
            inds = np.where((scores[:, j] > thresh[j])
                            & (roidb[i]['gt_classes'] == 0))[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_depths = depths[inds]

            top_inds = np.argsort(-cls_scores)[:max_per_image]
            cls_scores = cls_scores[top_inds]
            cls_boxes = cls_boxes[top_inds, :]
            cls_depths = cls_depths[top_inds]
            # push new scores onto the minheap
            for val in cls_scores:
                heapq.heappush(top_scores[j], val)
            # if we've collected more than the max number of detection,
            # then pop items off the minheap and update the class threshold
            if len(top_scores[j]) > max_per_set:
                while len(top_scores[j]) > max_per_set:
                    heapq.heappop(top_scores[j])
                thresh[j] = top_scores[j][0]

            all_boxes[j][i] = \
                    np.hstack((cls_boxes, cls_scores[:, np.newaxis], cls_depths[:])) \
                    .astype(np.float32, copy=False)

            if 0:
                keep = nms(all_boxes[j][i], 0.3)
                vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :])
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)

    for j in xrange(1, imdb.num_classes):
        for i in xrange(num_images):
            inds = np.where(all_boxes[j][i][:, 4] > thresh[j])[0]
            all_boxes[j][i] = all_boxes[j][i][inds, :]

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Applying NMS to all detections'
    nms_dets = apply_nms(all_boxes, cfg.TEST.NMS)

    print 'Evaluating detections'
    imdb.evaluate_detections(nms_dets, output_dir)
Example #45
0
 def save_model(self):
     # save the network
     with open(self.save_file, "wb") as f:
         cPickle.dump(self.G, f)
     return
        -1, config["test"]["number_of_episodes"], is_train=False
    )
    test_episodes = eval_result[0]
    test_successful_episodes = eval_result[1]
    test_collision_episodes = eval_result[2]
    test_max_len_episodes = eval_result[3]
    print_state(
        "validation episodes",
        test_episodes,
        test_successful_episodes,
        test_collision_episodes,
        test_max_len_episodes,
    )

    with open(
        os.path.join(test_completed_trajectories_dir, "final_status.txt"), "w"
    ) as final_message_file:
        validation_rate = test_successful_episodes / float(test_episodes)
        final_message_file.write("final validation rate is {}".format(validation_rate))
        final_message_file.flush()

    test_results.append((-1, test_episodes, test_successful_episodes))

    rollout_manager.end()

test_results_file = os.path.join(
    test_completed_trajectories_dir, "test_results.test_results_pkl"
)
with bz2.BZ2File(test_results_file, "w") as compressed_file:
    pickle.dump(test_results, compressed_file)
Example #47
0
 def save(self, path):
     with open(path, 'w') as outfile:
         pickle.dump(self, outfile)
Example #48
0
def get_scores(model_name):
    scores = []
    base_path = "/home/ml/mgrena/mod-target/pgn/pretrained_model_tf1.2.1/mod-target1375"
    decoded_path = os.path.join(base_path, "decoded")
    reference_path = os.path.join(base_path, "reference")

    if len(os.listdir(decoded_path)) != len(os.listdir(reference_path)):
        raise ValueError(
            "Number of reference summaries and decoded summaries do not match")

    num_articles = len(os.listdir(reference_path))
    print(num_articles)
    if not os.path.exists("decoded_tmp"):
        os.mkdir("decoded_tmp")
    if not os.path.exists("reference_tmp"):
        os.mkdir("reference_tmp")
    if not os.path.exists("temp-files"):
        os.mkdir("temp-files")

    # Hacky housecleaning. Pyrouge stores these massive tmp files and it can't be turned off. So we store them here
    tempfile.tempdir = os.path.join(os.getcwd(), "temp-files")

    for i in range(0, num_articles):
        # File names
        decoded_filename = str(i).rjust(6, '0') + "_decoded.txt"
        reference_filename = str(i).rjust(6, '0') + "_reference.txt"

        # Copy files over to temp folder
        copyfile(os.path.join(decoded_path, decoded_filename),
                 os.path.join("decoded_tmp/", decoded_filename))
        copyfile(os.path.join(reference_path, reference_filename),
                 os.path.join("reference_tmp/", reference_filename))

        # ROUGE object
        r = Rouge155()
        r._system_dir = 'decoded_tmp/'
        r._model_dir = 'reference_tmp/'

        r.system_filename_pattern = '(\d+)_decoded.txt'
        r.model_filename_pattern = '#ID#_reference.txt'

        output = r.convert_and_evaluate()
        output_dict = r.output_to_dict(output)

        essential_keys = [
            'rouge_1_f_score', 'rouge_2_f_score', 'rouge_3_f_score',
            'rouge_l_f_score'
        ]
        essential_dict = {key: output_dict[key] for key in essential_keys}
        scores.append(essential_dict)

        # Remove temp files
        os.unlink("decoded_tmp/" + decoded_filename)
        os.unlink("reference_tmp/" + reference_filename)

    # Pickle final results
    pickle_out = open(model_name + ".pic", "wb")
    pickle.dump(scores, pickle_out)
    pickle_out.close()

    # Delete excessive log files
    print("Removing temp files")
    rmtree(path="temp-files")
    rmtree(path="decoded_tmp/")
    rmtree(path="reference_tmp/")
Example #49
0
def pickle_save(contact, filepath):  #使用pickle模块将数据对象保存到文件
    f = open(filepath, 'w')
    pickle.dump(contact, f)
    f.close()
Example #50
0
def main():
    print 'Start...'

    input_file = raw_input('Input file: ')
    # input_file = 'input/me_at_the_zoo.in'
    file_name = input_file.split('/')[-1].split('.')[0]

    pickle_files_path = {
        'infos':
        pickles_files_path("infos", file_path=file_name),
        'videos_sizes':
        pickles_files_path("videos_sizes", file_path=file_name),
        'endpoints_objects':
        pickles_files_path("endpoints_objects", file_path=file_name),
        'calculation_objects':
        pickles_files_path("calculation_objects", file_path=file_name),
    }

    f = open(input_file, 'r')
    f_list = f.readlines()

    first_line = f_list.pop(0)[:-1].split(' ')

    # ask if load data from caches files or from input files
    use_cached_data = (raw_input('Use cached data? (y/n): ') == 'y')
    if exists('tmp/' + file_name) and not use_cached_data:
        shutil.rmtree('tmp/' + file_name)

    if not exists('tmp/' + file_name):
        mkdir('tmp/' + file_name)

    if isfile(pickle_files_path['infos']):
        infos = pickle.load(open(pickle_files_path['infos'], "rb"))
    else:

        infos = {
            'n_videos': int(first_line[0]),
            'n_endpoints': int(first_line[1]),
            'n_request_descr': int(first_line[2]),
            'n_caches': int(first_line[3]),
            'caches_size': int(first_line[4])
        }

        pickle.dump(infos, open(pickle_files_path['infos'], "wb"))

    data = None
    table_ep_requests = None
    endpoints_latency_data_center = None
    table_ep_cchs = None

    # try to load data from caches files, if not caches files doesn't exists read data from input file
    try:
        videos_sizes = pickle.load(
            open(pickle_files_path['videos_sizes'], "rb"))

        data = np.load(pickle_files_path['endpoints_objects'])

        if not ('table_endpoints_requests' and 'endpoints_latency_data_center'
                and 'table_endpoints_caches') in data.keys():
            data.close()
            raise IOError

        endpoints_latency_data_center = data['endpoints_latency_data_center']
        table_ep_cchs = data['table_endpoints_caches']
        table_ep_requests = data['table_endpoints_requests']

        data.close()

        print 'Load from cache!'

    except IOError:
        print 'Data not in cache, prepare to read file...'

        # array with videos sizes
        # size -> (1D) #videos
        videos_sizes = map(int, f_list.pop(0)[:-1].split(' '))

        pickle.dump(videos_sizes, open(pickle_files_path['videos_sizes'],
                                       "wb"))

        # array with latencies from endpoints to data center
        # access with enpoint id
        # size -> (1D) #enpoints
        endpoints_latency_data_center = np.zeros(shape=infos['n_endpoints'])

        # table to relation endpoints latency to a specific cache
        # size -> (2D) lines=#endpoints | columns=#caches
        table_ep_cchs = np.zeros(shape=(infos['n_endpoints'],
                                        infos['n_caches']))

        # go to over all endpoints informations
        #   read latency from endpoint to datacenter and save on endpoints_latency_data_center
        #   read latency from that endpoint to cache and save on table_ep_cchs
        for i in range(0, infos['n_endpoints']):
            endpoint_info = f_list.pop(0)[:-1].split(' ')
            endpoints_latency_data_center[i] = int(endpoint_info[0])
            for j in range(0, int(endpoint_info[1])):
                cache = f_list.pop(0)[:-1].split(' ')
                table_ep_cchs[i, int(cache[0])] = int(cache[1])

        print "Reading...\n"

        # table to relation videos requests with enpoint from they come
        # size -> (2D) lines=#endpoints | columns=#videos
        table_ep_requests = np.zeros(shape=(infos['n_endpoints'],
                                            infos['n_videos']))

        # go over all request descriptions
        #   read request information
        #   verify if video from that request has a size greater than cache size
        #   if so -> save the #requests of that video from the specific endpoint on table_ep_requests
        for i in range(0, int(infos['n_request_descr'])):
            videos_info = f_list.pop(0)[:-1].split(' ')
            video_id = int(videos_info[0])
            if videos_sizes[video_id] > infos['caches_size']:
                continue
            table_ep_requests[int(videos_info[1]),
                              int(videos_info[0])] = int(videos_info[2])

        # caches data
        np.savez(pickle_files_path['endpoints_objects'],
                 endpoints_latency_data_center=endpoints_latency_data_center,
                 table_endpoints_caches=table_ep_cchs,
                 table_endpoints_requests=table_ep_requests)

    print 'Data loaded!'

    if use_cached_data:
        data = np.load(pickle_files_path['calculation_objects'])
        matrix_caches_requests = data['matrix_caches_requests']
    else:
        matrix_caches_requests = np.zeros(shape=(infos['n_videos'],
                                                 infos['n_caches']),
                                          dtype='int')

        total_latency_dataCenter_matrix = table_ep_requests * np.transpose(
            endpoints_latency_data_center)[:, None]

        print 'Start calculations...'

        for i in range(0, table_ep_requests.shape[1]):
            x = table_ep_requests[:, i]
            latency_dataCenter = total_latency_dataCenter_matrix[:, i]
            tmp_matrix = latency_dataCenter[:,
                                            None] - table_ep_cchs * x[:, None]
            matrix_caches_requests[i, :] = np.sum(tmp_matrix, axis=0)

        print 'Almost done...'
        np.savez(pickle_files_path['calculation_objects'],
                 matrix_caches_requests=matrix_caches_requests)

    tmp_matrix = (-matrix_caches_requests).argsort(axis=None, kind='mergesort')
    tmp_matrix = np.unravel_index(tmp_matrix, matrix_caches_requests.shape)
    index_matrix_caches_requests_sorted = np.vstack(tmp_matrix).T

    del tmp_matrix
    del matrix_caches_requests

    import gc
    gc.collect()

    caches_ocup_size = np.zeros(infos['n_caches'])
    caches_videos_id = [[] for i in range(infos['n_caches'])]

    print 'Just some more calculations...'

    # print table_ep_requests_cpy
    while True:
        request_cache = index_matrix_caches_requests_sorted[0]
        if index_matrix_caches_requests_sorted.shape[0] <= 1:
            break
        index_matrix_caches_requests_sorted = index_matrix_caches_requests_sorted[
            1:]
        ep_of_cch = np.nonzero(table_ep_cchs[:, request_cache[1]])[0]
        # print ep_of_cch
        v_reqs = np.nonzero(table_ep_requests[:, request_cache[0]])[0]
        # print v_reqs
        v_reqs_to_rm = np.intersect1d(ep_of_cch, v_reqs, assume_unique=True)
        # print v_reqs_to_rm

        if v_reqs_to_rm.size <= 0:
            # print 'ok ;-)'
            continue
        # print table_ep_requests_cpy[:, request_cache[1]]
        if caches_ocup_size[request_cache[1]] + videos_sizes[
                request_cache[0]] <= infos['caches_size']:
            caches_ocup_size[request_cache[1]] += videos_sizes[
                request_cache[0]]
            (caches_videos_id[request_cache[1]]).append(request_cache[0])
            table_ep_requests[:, request_cache[0]][v_reqs_to_rm] = 0
            # print table_ep_requests_cpy[:, request_cache[1]]

    print 'Writting output...'

    f_out = open('output/' + file_name + '.out', 'w')
    f_out.write(str(len(caches_ocup_size)) + '\n')
    for i in range(0, len(caches_videos_id)):
        f_out.write(str(i))
        for videos_id in caches_videos_id[i]:
            f_out.write(' ' + str(videos_id))
        f_out.write('\n')
Example #51
0
 def write(self):
     log_fid = open(self.log_file,'w')
     cPickle.dump(self, log_fid)
     log_fid.close()
Example #52
0
def save_binary_pickle(data, filepath):
    with open(filepath, 'wb') as f:
        pickle.dump(data, f)
Example #53
0
 def _dump(att, dat):
     pickle.dump(tuple(getattr(self, a) for a in att), dat, -1)
Example #54
0
 def serialise(obj, f):
     pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def saveParams(para, fname):
    f = file(fname, 'wb')
    cPickle.dump(para, f, protocol=cPickle.HIGHEST_PROTOCOL)
    f.close()
def kitti_eval(detpath,
               annopath,
               imagesetfile,
               classname,
               cachedir,
               ovthresh=0.5,
               use_07_metric=False,
               imagepath=None):
    """rec, prec, ap = voc_eval(detpath,
                                annopath,
                                imagesetfile,
                                classname,
                                [ovthresh],
                                [use_07_metric])

    Top level function that does the PASCAL VOC evaluation.

    detpath: Path to detections
        detpath.format(classname) should produce the detection results file.
    annopath: Path to annotations
        annopath.format(imagename) should be the xml annotations file.
    imagesetfile: Text file containing the list of images, one image per line.
    classname: Category name (duh)
    cachedir: Directory for caching the annotations
    [ovthresh]: Overlap threshold (default = 0.5)
    [use_07_metric]: Whether to use VOC07's 11 point AP computation
        (default False)
    """
    # assumes detections are in detpath.format(classname)
    # assumes annotations are in annopath.format(imagename)
    # assumes imagesetfile is a text file with each line an image name
    # cachedir caches the annotations in a pickle file

    # first load gt
    if not os.path.isdir(cachedir):
        os.mkdir(cachedir)
    cachefile = os.path.join(cachedir, 'annots.pkl')
    # read list of images
    with open(imagesetfile, 'r') as f:
        lines = f.readlines()
    imagenames = [x.strip() for x in lines]

    if not os.path.isfile(cachefile):
        # load annots
        recs = {}
        for i, imagename in enumerate(imagenames):
            recs[imagename] = parse_rec(annopath.format(imagename))
            if i % 100 == 0:
                print 'Reading annotation for {:d}/{:d}'.format(
                    i + 1, len(imagenames))
        # save
        print 'Saving cached annotations to {:s}'.format(cachefile)
        with open(cachefile, 'w') as f:
            cPickle.dump(recs, f)
    else:
        # load
        with open(cachefile, 'r') as f:
            recs = cPickle.load(f)

    # extract gt objects for this class
    class_recs = {}
    npos = 0
    n = 0
    vis_gt_ex = False
    for imagename in imagenames:
        R = [obj for obj in recs[imagename] if obj['name'] == classname]
        bbox = np.array([x['bbox'] for x in R])
        npos = npos + len(bbox)
        det = [False] * len(R)
        class_recs[imagename] = {'bbox': bbox,
                                 'det': det}

        if n < 10 and len(bbox) > 0 and vis_gt_ex == True:
            im = cv2.imread(imagepath.format(imagename))
            vis_detections(im, classname, class_recs[imagename]['bbox'], thresh=0.1)
            n += 1
    # read dets
    detfile = detpath.format(classname)
    with open(detfile, 'r') as f:
        lines = f.readlines()

    splitlines = [x.strip().split(' ') for x in lines]
    image_ids = [x[0] for x in splitlines]
    confidence = np.array([float(x[1]) for x in splitlines])
    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])

    # sort by confidence
    sorted_ind = np.argsort(-confidence)
    sorted_scores = np.sort(-confidence)
    BB = BB[sorted_ind, :]
    image_ids = [image_ids[x] for x in sorted_ind]



    vis_det_ex = True
    if vis_det_ex:
        image_ids_to_idx = {}
        for idx,img_id in enumerate(image_ids):
            if img_id in image_ids_to_idx.keys():
                image_ids_to_idx[img_id] += [idx]
            else:
                image_ids_to_idx[img_id] = [idx]

        n = 10
        ids = np.random.permutation(len(BB))[:n]
        for i,idx in enumerate(ids): # we want "n" unique images
            # we need to grab "class_recs[image_ids[d]]"
            print(i,idx)
            image_id = image_ids[idx]
            # now find all the indicies with the given image_id
            image_idx = image_ids_to_idx[image_id]
            bbox = BB[image_idx,:]
            conf = -1*sorted_scores[image_idx]
            print(conf)
            bboxes = np.concatenate((bbox,conf[:,np.newaxis]),axis=1)
            if len(bboxes) > 0:
                im = cv2.imread(imagepath.format(image_id))
                vis_detections(im, classname, bboxes, thresh=0.20,name="vis_det_{}.png")
                n += 1
            else:
                print("ohno!")
                sys.exit(1)

    ovthresh = [0.5,0.75,0.95]
    nd = len(image_ids)
    tp = np.zeros((nd,len(ovthresh)))
    fp = np.zeros((nd,len(ovthresh)))
    for d in range(nd):
        R = class_recs[image_ids[d]]
        bb = BB[d, :].astype(float)
        ovmax = -np.inf
        BBGT = R['bbox'].astype(float)
        if BBGT.size > 0:
            # compute overlaps
            # intersection
            ixmin = np.maximum(BBGT[:, 0], bb[0])
            iymin = np.maximum(BBGT[:, 1], bb[1])
            ixmax = np.minimum(BBGT[:, 2], bb[2])
            iymax = np.minimum(BBGT[:, 3], bb[3])
            iw = np.maximum(ixmax - ixmin + 1., 0.)
            ih = np.maximum(iymax - iymin + 1., 0.)
            inters = iw * ih

            # union
            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                   (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                   (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

            overlaps = inters / uni
            ovmax = np.max(overlaps)
            jmax = np.argmax(overlaps)
            # print("-=-=-=-=-=-=-=-=-=-=-=-")
            # print(overlaps)
            # print(sorted_scores)
        # if(sorted_scores[d] >= -0.5):
        #     continue
        #print(sorted_scores[d],sorted_scores[d] < -0.0)
        inside_any = False
        for idx in range(len(ovthresh)):
            if ovmax > ovthresh[idx]:
                if not R['det'][jmax]:
                    inside_any = True
                    tp[d,idx] = 1.
                    #print("tp")
                else:
                    fp[d,idx] = 1.
                    #print("fp")
            else:
                fp[d,idx] = 1.
                #print("fp")

        if inside_any is True:
            R['det'][jmax] = 1

    rec = np.zeros((len(fp),len(ovthresh)))
    prec = np.zeros((len(fp),len(ovthresh)))
    ap = np.zeros(len(ovthresh))
    for idx in range(len(ovthresh)):
        # compute precision recall
        _fp = np.cumsum(fp[:,idx])
        _tp = np.cumsum(tp[:,idx])
        rec[:,idx] = _tp / float(npos)
        # avoid divide by zero in case the first detection matches a difficult
        # ground truth
        prec[:,idx] = _tp / np.maximum(_tp + _fp, np.finfo(np.float64).eps)
        #ap = voc_ap(rec, prec, use_07_metric)
        ap[idx] = voc_ap(rec[:,idx], prec[:,idx], classname, False)

    #print(fp,tp,rec,prec,ap,npos)
    return rec, prec, ap, ovthresh
Example #57
0
#!/usr/bin/python2
#filename:pickling.py

import cPickle as p

shoplistfile='shoplist.data'

shoplist=['apple','mango','carrot']

f=file(shoplistfile,'w')
p.dump(shoplist,f)
f.close

del shoplist

f=file(shoplistfile)
storelist=p.load(f)
print storelist
def get_data(text_only):
    #text_only = False

    if text_only:
        print("Text only")
        image_list = []
    else:
        print("Text and image")
        image_list = read_image()

    train_data = write_data("train", image_list, text_only)
    valiate_data = write_data("validate", image_list, text_only)
    test_data = write_data("test", image_list, text_only)

    print("loading data...")
    # w2v_file = '../Data/GoogleNews-vectors-negative300.bin'
    vocab, all_text = load_data(train_data, valiate_data, test_data)
    # print(str(len(all_text)))

    print("number of sentences: " + str(len(all_text)))
    print("vocab size: " + str(len(vocab)))
    max_l = len(max(all_text, key=len))
    print("max sentence length: " + str(max_l))

    #
    #
    word_embedding_path = "../Data/weibo/w2v.pickle"

    w2v = pickle.load(open(word_embedding_path, 'rb'))
    # print(temp)
    # #
    print("word2vec loaded!")
    print("num words already in word2vec: " + str(len(w2v)))
    # w2v = add_unknown_words(w2v, vocab)
    # file_path = "../Data/weibo/event_clustering.pickle"
    # if not os.path.exists(file_path):
    #     train = []
    #     for l in train_data["post_text"]:
    #         line_data = []
    #         for word in l:
    #             line_data.append(w2v[word])
    #         line_data = np.matrix(line_data)
    #         line_data = np.array(np.mean(line_data, 0))[0]
    #         train.append(line_data)
    #     train = np.array(train)
    #     cluster = AgglomerativeClustering(n_clusters=15, affinity='cosine', linkage='complete')
    #     cluster.fit(train)
    #     y = np.array(cluster.labels_)
    #     pickle.dump(y, open(file_path, 'wb+'))
    # else:
    # y = pickle.load(open(file_path, 'rb'))
    # print("Event length is " + str(len(y)))
    # center_count = {}
    # for k, i in enumerate(y):
    #     if i not in center_count:
    #         center_count[i] = 1
    #     else:
    #         center_count[i] += 1
    # print(center_count)
    # train_data['event_label'] = y

    #
    print("word2vec loaded!")
    print("num words already in word2vec: " + str(len(w2v)))
    add_unknown_words(w2v, vocab)
    W, word_idx_map = get_W(w2v)
    # # rand_vecs = {}
    # # add_unknown_words(rand_vecs, vocab)
    W2 = rand_vecs = {}
    w_file = open("../Data/weibo/word_embedding.pickle", "wb")
    pickle.dump([W, W2, word_idx_map, vocab, max_l], w_file)
    w_file.close()
    return train_data, valiate_data, test_data
Example #59
0
 def save(self, filename):
     with open(filename, "wb") as fout:
         cPickle.dump(self, fout, protocol=2)
Example #60
0
    for row in reader:
        if ind==0:
            dataheader.append(row)
            ind+=1
        else:
            if(row[2]=='Training'):
                yTrain.append(int(row[0]))
                XTrain.append([int(j) for j in row[1].split()])
            elif(row[2]=='PublicTest'):
                yValid.append(int(row[0]))
                XValid.append([int(j) for j in row[1].split()])
            else:
                yTest.append(int(row[0]))
                XTest.append([int(j) for j in row[1].split()])


XTrain=np.array(XTrain)
XTest=np.array(XTest)
XValid=np.array(XValid)


pickle.dump(XTrain,open('XTrain','wb'))
pickle.dump(yTrain,open('yTrain','wb'))
pickle.dump(XValid,open('XValid','wb'))
pickle.dump(yValid,open('yValid','wb'))
pickle.dump(XTest,open('XTest','wb'))
pickle.dump(yTest,open('yTest','wb'))
pickle.dump(Emotion,open('emotionList','wb'))