Python Unpicklerの例、_pickle.Unpickler Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_cPickle.py プロジェクト: zuvys/ironpython3

    def test_pers_load(self):
        for binary in [True, False]:
            src = StringIO()
            p = cPickle.Pickler(src)
            p.persistent_id = persistent_id
            p.binary = binary

            value = MyData('abc')
            p.dump(value)

            up = cPickle.Unpickler(StringIO(src.getvalue()))
            up.persistent_load = persistent_load
            res = up.load()

            self.assertEqual(res.value, value.value)

            # errors
            src = StringIO()
            p = cPickle.Pickler(src)
            p.persistent_id = persistent_id
            p.binary = binary

            value = MyData('abc')
            p.dump(value)

            up = cPickle.Unpickler(StringIO(src.getvalue()))

            # exceptions vary betwee cPickle & Pickle
            try:
                up.load()
                self.assertUnreachable()
            except Exception, e:
                pass

コード例 #2

0

ファイルを表示

def main(feats_path, n_comps):
    with open(feats_path, 'rb') as handle:
        unpickler = pickle.Unpickler(handle)
        labels = unpickler.load()

    labels = {
        name: vector
        for name, vector in labels.items() if vector is not None
    }
    names = list(labels.keys())
    vectors = list(labels.values())

    print('[INFO] Conducting PCA on ' + feats_path + ' with ' + str(n_comps) +
          ' components')
    pca = PCA(n_components=n_comps)
    vectors = pca.fit_transform(vectors)

    # save reduced vectors
    base = path.basename(feats_path)
    name = path.splitext(base)[0]

    output = name + '_ncomps' + str(n_comps) + '.pickle'
    print('[INFO] Saving reduced vectors to ' + output)
    with open(output, 'wb') as handle:
        pickle.dump(dict(zip(names, vectors)), handle)

コード例 #3

0

ファイルを表示

ファイル: POStagger.py プロジェクト: lintangsutawika/berkicau

	def restore_model(self, path="tnt_pos_tagger.pic"):
		object_path = open("tnt_pos_tagger.pic", 'r')
		tag_object = cPickle.Unpickler(object_path)
		tagger = tag_object.load()
		object_path.close()

		return tagger

コード例 #4

0

ファイルを表示

def main(feats_path):
    with open(feats_path, 'rb') as handle:
        unpickler = pickle.Unpickler(handle)
        labels = unpickler.load()

    labels = {
        name: vector
        for name, vector in labels.items() if vector is not None
    }
    features = np.asarray(list(labels.values()))

    print('[INFO] Conducting t-SNE on ' + feats_path)
    tsne = TSNE(metric='braycurtis',
                verbose=1,
                n_iter=5000,
                random_state=42,
                n_jobs=-1)
    projection = tsne.fit_transform(features)

    # save reduced vectors
    base = path.basename(feats_path)
    name = path.splitext(base)[0]

    output = name + '_tsne.pickle'
    print('[INFO] Saving reduced vectors to ' + output)
    with open(output, 'wb') as handle:
        pickle.dump(projection, handle)

コード例 #5

0

ファイルを表示

ファイル: store-labels.py プロジェクト: ColoredInsaneAsylums/Clustering-PrivacySensitiveTranscription

def main(label_path):
    # connection to mongodb
    mongoConn = MongoClient(csh_db_cfg.DB_HOST + ":" + str(csh_db_cfg.DB_PORT))
    cshTransDB = mongoConn[csh_db_cfg.TRANSCRIPTION_DB_NAME]
    cshTransDB.authenticate(csh_db_cfg.TRANSCRIPTION_DB_USER,
                            csh_db_cfg.TRANSCRIPTION_DB_PASS)
    cshCollection = cshTransDB[csh_db_cfg.TRANS_DB_MeetingMinColl]

    print('[INFO] Loading saved cluster labels from ' + label_path)
    with open(label_path, 'rb') as handle:
        unpickler = pickle.Unpickler(handle)
        labels = unpickler.load()

    print('[INFO] Saving cluster labels to MongoDB')
    for name, label in labels.items():
        # set 0th cluster to noise
        if label == 0:
            label = -1

        searchQuery = {'anonymizedImageFile': name}
        updateQuery = {
            '$set': {
                'cluster': str(label),
            }
        }
        cshCollection.find_one_and_update(searchQuery, updateQuery)

コード例 #6

0

ファイルを表示

def main(feat_path, dict_path):
    print('[INFO] Working...')

    # load image PHOCs
    print('[INFO] Loading image PHOCs from \'' + feat_path + '\'')
    with open(feat_path, 'rb') as handle:
        unpickler = pickle.Unpickler(handle)
        image_phocs = unpickler.load()

    # load dictionary PHOCs
    print('[INFO] Loading dicitonary (word) PHOCs from \'' + dict_path + '\'')
    with open(dict_path, 'rb') as handle:
        unpickler = pickle.Unpickler(handle)
        dict_phocs = unpickler.load()
        words = list(dict_phocs.keys())
        word_phocs = np.array(list(dict_phocs.values()))

    # initiailize NearestNeighbors learner
    print('[INFO] Fitting nearest neighbors learner')
    n_neighbors = 5
    nn = NearestNeighbors(n_neighbors=n_neighbors,
                          algorithm='auto',
                          metric=distance.braycurtis,
                          n_jobs=-1)
    nn = nn.fit(word_phocs)

    while True:
        results = {}

        # get user input
        image_q = input('[INPUT] Input an image to predict text for: ')

        if image_q not in image_phocs:
            print('[INFO] Query image not found, please try another.')
            continue

        phoc_q = image_phocs[image_q].reshape(1, -1)

        # calculate braycurtis disimilarities and find nearest neighbors
        print('[INFO] Finding top candidate predictions...')
        dist, ind = nn.kneighbors(phoc_q)
        dist, ind = dist[0][::-1], ind[0][::-1]

        # present results
        print('[INFO] Results:')
        for i in range(n_neighbors):
            print('{0}\t{1}'.format(words[ind[i]], dist[i]))

コード例 #7

0

ファイルを表示

ファイル: test_cPickle.py プロジェクト: thkfighter/ironpython3

    def test_persistent_load(self):
        class MyData(object):
            def __init__(self, value):
                self.value = value

        def persistent_id(obj):
            if hasattr(obj, 'value'):
                return 'MyData: %s' % obj.value
            return None

        def persistent_load(id):
            return MyData(id[8:])


        for binary in [True, False]:
            src = StringIO()
            p = cPickle.Pickler(src)
            p.persistent_id = persistent_id
            p.binary = binary

            value = MyData('abc')
            p.dump(value)

            up = cPickle.Unpickler(StringIO(src.getvalue()))
            up.persistent_load = persistent_load
            res = up.load()

            self.assertEqual(res.value, value.value)

            # errors
            src = StringIO()
            p = cPickle.Pickler(src)
            p.persistent_id = persistent_id
            p.binary = binary

            value = MyData('abc')
            p.dump(value)

            up = cPickle.Unpickler(StringIO(src.getvalue()))

            # exceptions vary between cPickle & Pickle
            try:
                up.load()
                AssertUnreachable()
            except Exception, e:
                pass

コード例 #8

0

ファイルを表示

ファイル: dataset_processor.py プロジェクト: ablab/IsoQuant

 def __init__(self, save_file_name, gffutils_db, multimappers_disct):
     logger.info("Loading read assignments from " + save_file_name)
     assert os.path.exists(save_file_name)
     self.save_file_name = save_file_name
     self.unpickler = pickle.Unpickler(open(save_file_name, "rb"), fix_imports=False)
     self.current_gene_info_obj = None
     self.is_updated = False
     self.gffutils_db = gffutils_db
     self.multimapped_reads = multimappers_disct

コード例 #9

0

ファイルを表示

def unpickleData(fname):
    f = gzip.open(fname)
    u = pickle.Unpickler(f)
    data = []
    try:
        while True:
            rec = u.load()
            data.append(rec)
    except EOFError:
        pass
    f.close()
    return data

コード例 #10

0

ファイルを表示

ファイル: dataset_processor.py プロジェクト: ablab/IsoQuant

    def load_read_info(self, dump_filename):
        gc.disable()
        if not self.multimapped_reads:
            multimap_unpickler = pickle.Unpickler(open(dump_filename + "_multimappers", "rb"), fix_imports=False)
            while True:
                try:
                    obj = multimap_unpickler.load()
                    if isinstance(obj, list):
                        assignment_list = obj
                        read_id = assignment_list[0].read_id
                        self.multimapped_reads[read_id] = assignment_list
                    else:
                        raise ValueError("Multimap assignment file {} is corrupted!".format(dump_filename))
                except EOFError:
                    break

        info_unpickler = pickle.Unpickler(open(dump_filename + "_info", "rb"), fix_imports=False)
        total_assignments = info_unpickler.load()
        polya_assignments = info_unpickler.load()

        gc.enable()
        return total_assignments, polya_assignments

コード例 #11

0

ファイルを表示

ファイル: compare-features.py プロジェクト: ColoredInsaneAsylums/Clustering-PrivacySensitiveTranscription

def main(feat_path, images_path):
    print('[INFO] Working...')

    # similarity threshold
    threshold = 0.6

    # load feature vectors
    print('[INFO] Loading features from ' + feat_path)
    with open(feat_path, 'rb') as handle:
        unpickler = pickle.Unpickler(handle)
        index = unpickler.load()

    while True:
        results = {}

        # get user input
        image_q = input('[INPUT] Input an image to compare: ')

        if image_q not in index:
            print('[INFO] Query image not found, please try another.')
            continue

        feature_q = index[image_q]

        # calculate Bray Curtis dissimilarities
        print('[INFO] Calculating Bray Curtis dissimilarities...')
        for image_n, feature_n in index.items():
            score = 1 - distance.braycurtis(feature_n, feature_q)
            if score > threshold:
                results[image_n] = score

        del results[image_q]

        results = [(image_n, results[image_n]) for image_n in \
                  sorted(results, key=results.get, reverse=True)]

        # present results
        print('[INFO] Results:')
        for image_n, score in results:
            print('{0}\t{1}'.format(image_n, score))

            img = cv2.imread(images_path + '/' + image_n, 0)
            cv2.imshow(image_n, img)

            k = cv2.waitKey(0) & 0xFF
            cv2.destroyAllWindows()

            # press ESC to exit
            if k == 27:
                break

コード例 #12

0

ファイルを表示

ファイル: dataset_processor.py プロジェクト: ablab/IsoQuant

def load_assigned_reads(save_file_name, gffutils_db, multimapped_reads):
    gc.disable()
    logger.info("Loading read assignments from " + save_file_name)
    assert os.path.exists(save_file_name)
    save_file_name = save_file_name
    unpickler = pickle.Unpickler(open(save_file_name, "rb"), fix_imports=False)
    read_storage = []
    current_gene_info = None

    while True:
        try:
            obj = unpickler.load()
            if isinstance(obj, ReadAssignment):
                read_assignment = obj
                assert current_gene_info is not None
                read_assignment.gene_info = current_gene_info
                if read_assignment.read_id in multimapped_reads:
                    resolved_assignment = None
                    for a in multimapped_reads[read_assignment.read_id]:
                        if a.start == read_assignment.start() and a.end == read_assignment.end() and \
                                a.gene_id == current_gene_info.gene_db_list[0].id and \
                                a.chr_id == read_assignment.chr_id:
                            if resolved_assignment is not None:
                                logger.warning("Duplicate read: %s %s %s" % (read_assignment.read_id, a.gene_id, a.chr_id))
                            resolved_assignment = a

                    if not resolved_assignment:
                        logger.warning("Incomplete information on read %s" % read_assignment.read_id)
                    elif resolved_assignment.assignment_type == ReadAssignmentType.noninformative:
                        continue
                    else:
                        read_assignment.assignment_type = resolved_assignment.assignment_type
                        read_assignment.multimapper = resolved_assignment.multimapper
                read_storage.append(read_assignment)
            elif isinstance(obj, GeneInfo):
                if current_gene_info and read_storage:
                    yield current_gene_info, read_storage
                read_storage = []
                current_gene_info = obj
                current_gene_info.db = gffutils_db
            else:
                raise ValueError("Read assignment file {} is corrupted!".format(save_file_name))
        except EOFError:
            break
    gc.enable()
    if current_gene_info and read_storage:
        yield current_gene_info, read_storage

コード例 #13

0

ファイルを表示

ファイル: xml_utils.py プロジェクト: luciaicull/virtuosoNet_emotion

def get_score_information(xml_path, feature_path, feature_stats,
                      start_tempo, composer,
                      vel_pair):
    xml_object, xml_notes = read_xml_to_notes(xml_path)
    beats = xml_object.get_beat_positions()
    measure_positions = xml_object.get_measure_positions()
    
    with open(feature_path, "rb") as f:
        u = cPickle.Unpickler(f)
        feature_dict = u.load()
    
    # TODO: not an array I think..?
    test_x = feature_dict['input_data']
    note_locations = feature_dict['note_location']['data']
    edges = feature_dict['graph']

    return test_x, xml_notes, xml_object, edges, note_locations

コード例 #14

0

ファイルを表示

ファイル: memory.py プロジェクト: SuerpX/simple_GVF_sequence_clusters

    def load(self, path):
        """ Load the parameters of a saved off memory file
        Parameters
        ----------
        path: str
            The path of where the saved off file exists
        """
        restore_path = path + "/memory.info"
        if os.path.exists(restore_path):
            with open(restore_path, "rb") as file:
#                 info = pickle.load(file)
                p = pickle.Unpickler(file) 
#                 p.fast = True 
                info = p.load()
                p.memo.clear()

            self._storage = info["storage"]
            self._maxsize = info["maxsize"]
            self._next_idx = info["next_idx"]

コード例 #15

0

ファイルを表示

    def loadData(self, fname, label_vector_size=0):
        f = gzip.open(fname)
        u = pickle.Unpickler(f)

        data = []
        label = []
        try:
            while True:
                rec = u.load()
                data.append(rec[0])
                if label_vector_size > 0:
                    lbl_vector = np.zeros(label_vector_size)
                    lbl_vector[rec[1]] = 1
                    label.append(lbl_vector)
                else:
                    label.append(rec[1])
        except EOFError:
            pass
        return [np.array(data, dtype=rec[0].dtype), np.array(label)]

コード例 #16

0

ファイルを表示

ファイル: data_analysis.py プロジェクト: luciaicull/virtuosoNet_emotion

def save_data_pair_dataset_by_piece():
    data_path = Path('/home/yoojin/data/emotionDataset/feature_for_analysis')
    with open(data_path.joinpath('pairdataset.dat'), 'rb') as f:
        u = cPickle.Unpickler(f)
        emotion_pair_data = u.load()

    e1_pairs = []
    e2_pairs = []
    e3_pairs = []
    e4_pairs = []
    e5_pairs = []
    data_pair_set_by_piece = []

    for pair_set in tqdm(emotion_pair_data.data_pair_set_by_piece):
        set_list = []
        for pair_data in pair_set:
            data = DataPair(pair_data).make_dict()
            if pair_data.emotion is 1:
                e1_pairs.append(data)
            elif pair_data.emotion is 2:
                e2_pairs.append(data)
            elif pair_data.emotion is 3:
                e3_pairs.append(data)
            elif pair_data.emotion is 4:
                e4_pairs.append(data)
            else:
                e5_pairs.append(data)
            set_list.append(data)
        data_pair_set_by_piece.append(set_list)
    with open(data_path.joinpath("data_pair_set_by_piece.dat"), "wb") as f:
        pickle.dump(data_pair_set_by_piece, f, protocol=2)

    with open(data_path.joinpath("e1_pairs.dat"), "wb") as f:
        pickle.dump(e1_pairs, f, protocol=2)
    with open(data_path.joinpath("e2_pairs.dat"), "wb") as f:
        pickle.dump(e2_pairs, f, protocol=2)
    with open(data_path.joinpath("e3_pairs.dat"), "wb") as f:
        pickle.dump(e3_pairs, f, protocol=2)
    with open(data_path.joinpath("e4_pairs.dat"), "wb") as f:
        pickle.dump(e4_pairs, f, protocol=2)
    with open(data_path.joinpath("e5_pairs.dat"), "wb") as f:
        pickle.dump(e5_pairs, f, protocol=2)

コード例 #17

0

ファイルを表示

ファイル: evaluate-clusters.py プロジェクト: ColoredInsaneAsylums/Clustering-PrivacySensitiveTranscription

def main(label_path):
    print('[INFO] Loading saved clustering label from \'' + label_path + '\'')
    with open(label_path, 'rb') as handle:
        unpickler = pickle.Unpickler(handle)
        labels = unpickler.load()

    unique_labels = set(labels.values())
    print('[INFO] ' + str(len(unique_labels) - 1) + ' unique clusters and ' + \
          str(list(labels.values()).count(-1)) + ' noise points found')

    print(
        '[INFO] Press ESC to stop viewing a cluster, press any other key to continue'
    )

    while True:
        # get user input
        label = input('[INFO] Input a number from ' + str(min(unique_labels)) + \
                      ' to ' + str(max(unique_labels)) + ' to view a cluster: ')

        try:
            label = int(label)
        except:
            print('[ERROR] Expected an int')
            continue

        if label not in unique_labels:
            print('[ERROR] Input not in specified range')
            continue

        # view items in clusters
        for im, l in labels.items():
            if l == label:
                img = cv2.imread('../r37/' + im, 0)
                cv2.imshow(im, img)

                k = cv2.waitKey(0) & 0xFF
                cv2.destroyAllWindows()

                # press ESC to exit
                if k == 27:
                    break

コード例 #18

0

ファイルを表示

ファイル: memory.py プロジェクト: khlam/Embedded-Self-Predictions

    def load(self, path):
        """ Load the parameters of a saved off memory file
        Parameters
        ----------
        path: str
            The path of where the saved off file exists
        """
        restore_path = path + "/adaptive_memory.info"
        if os.path.exists(restore_path):
            with open(restore_path, "rb") as file:
                p = pickle.Unpickler(file) 
                info = p.load()
                p.memo.clear()

            self._alpha = info["alpha"]
            self._it_sum = info["it_sum"]
            self._it_min = info["it_min"]
            self._max_priority = info["max_priority"]
            self._next_idx = info["next_idx"]
            self._storage = info["storage"]
            self._maxsize = info["maxsize"]

コード例 #19

0

ファイルを表示

ファイル: memory_gqf.py プロジェクト: khlam/Embedded-Self-Predictions

    def load(self, path):
        """ Load the parameters of a saved off memory file
        Parameters
        ----------
        path: str
            The path of where the saved off file exists
        """
        restore_path = path + "/memory.info"
        if os.path.exists(restore_path):
            with open(restore_path, "rb") as file:
                #                 info = pickle.load(file)
                p = pickle.Unpickler(file)
                #                 p.fast = True
                info = p.load()
                p.memo.clear()

            self._storage = info["storage"]
            self._maxsize = info["maxsize"]
            self._next_idx = info["next_idx"]
        for obses_t, actions, rewards, obses_tp1, dones, rewards_decoms in self._storage:
            if np.sum(np.isnan(obses_t)) > 0 or np.sum(
                    obses_t == float('inf')) > 0:
                print(obses_t)
                input()
            if rewards == float('inf') or rewards == float('nan'):
                print(rewards)
                input()
            if np.sum(np.isnan(obses_tp1)) > 0 or np.sum(
                    obses_tp1 == float('inf')) > 0:
                print(obses_tp1)
                input()
            if np.sum(np.isnan(rewards_decoms)) > 0 or np.sum(
                    rewards_decoms == float('inf')) > 0:
                print(rewards_decoms)
                input()


#             self._storage = list(info["storage"][:30000])
#             self._maxsize = info["maxsize"]
#             self._next_idx = 30000

コード例 #20

0

ファイルを表示

ファイル: cluster-features.py プロジェクト: ColoredInsaneAsylums/Clustering-PrivacySensitiveTranscription

def main(feats_path, mcs, ms):
    print('[INFO] Preparing to cluster features from ' + feats_path)

    # load feature vectors
    with open(feats_path, 'rb') as handle:
        unpickler = pickle.Unpickler(handle)
        index = unpickler.load()
        index = {
            name: vector
            for name, vector in index.items() if vector is not None
        }

    # reshape 3d vectors to 2d
    dataset = list(index.values())
    dataset = np.asarray(dataset)

    # clusterer model
    print('[INFO] Using HDBSCAN to cluster features with mcs=' + str(mcs) +
          ', ms=' + str(ms))
    clusterer = hdbscan.HDBSCAN(min_cluster_size=mcs,
                                min_samples=ms,
                                metric='braycurtis',
                                algorithm='best',
                                core_dist_n_jobs=-1)

    # cluster
    clusterer.fit(dataset)

    # save labels
    base = path.basename(feats_path)
    name = path.splitext(base)[0]

    output = '../labels/hdbscan_' + name + '_mcs' + str(mcs) + '_ms' + str(
        ms) + '.pickle'
    print('[INFO] Saving predicted labels to ' + output)
    with open(output, 'wb') as handle:
        pickle.dump(dict(zip(index.keys(), clusterer.labels_)),
                    handle,
                    protocol=4)

コード例 #21

0

ファイルを表示

ファイル: get-subset.py プロジェクト: ColoredInsaneAsylums/Clustering-PrivacySensitiveTranscription

def main(feats_path, n):
    with open(feats_path, 'rb') as handle:
        unpickler = pickle.Unpickler(handle)
        labels = unpickler.load()

    labels = {name: vector for name, vector in labels.items() if vector is not None}
    names = list(labels.keys())
    vectors = list(labels.values())

    print('[INFO] Sampling ' + str(n) + ' instances from ' + feats_path)
    random.seed(42)
    indices = random.sample(range(len(labels)), n)
    subset = {names[i]: vectors[i] for i in indices}

    # save subset
    base = path.basename(feats_path)
    name = path.splitext(base)[0]

    output = name + '_sub' + str(n) + '.pickle'
    print('[INFO] Saving subset to ' + output)
    with open(output, 'wb') as handle:
        pickle.dump(subset, handle)

コード例 #22

0

ファイルを表示

ファイル: feature-comparison.py プロジェクト: gauravlalwani/PrivacySensitiveTranscription

def main():
    print('Working...')

    # similarity threshold
    threshold = 0.5

    # load feature vectors
    with open('features.pickle', 'rb') as handle:
        unpickler = pickle.Unpickler(handle)
        index = unpickler.load()

    while True:
        results = {}

        # get user input
        image_q = input('Input an image to compare: ')

        if image_q not in index:
            print('Query image not found, please try another.')
            continue

        feature_q = index[image_q]

        # calculate cosine similarities
        print('Calculating cosine similarities...')
        for image_n, feature_n in index.items():
            score = 1 - spatial.distance.cosine(feature_n, feature_q)
            if score > threshold:
                results[image_n] = score

        del results[image_q]

        results = [(image_n, results[image_n]) for image_n in \
                  sorted(results, key=results.get, reverse=True)]

        # present results
        print('Results:')
        for image_n, score in results:
            print('{0}\t{1}'.format(image_n, score))

コード例 #23

0

ファイルを表示

ファイル: cluster-dbscan.py プロジェクト: gauravlalwani/PrivacySensitiveTranscription

def main():
    print('Working...')

    # load feature vectors
    with open('features.pickle', 'rb') as handle:
        unpickler = pickle.Unpickler(handle)
        index = unpickler.load()

    # reshape 3d vectors to 2d
    dataset = list(index.values())
    dataset = np.asarray(dataset)

    n_samples, n_x, n_y = dataset.shape
    dataset = dataset.reshape((n_samples, n_x * n_y))

    # train model
    model = DBSCAN(metric='cosine', n_jobs=-1).fit(dataset)

    # save model
    with open('dbscan_model.pickle', 'wb') as handle:
        pickle.dump(model, handle)

    print('DBSCAN model trained and saved to dbscan_model.pickle')

コード例 #24

0

ファイルを表示

def load_pickle(file):
    with open(file, "rb") as f:
        data = pickle.Unpickler(f).load()
    return data

コード例 #25

0

ファイルを表示

ファイル: data_manager.py プロジェクト: luciaicull/emotionData_analysis

 def _load_file(self, file_name):
     with open(self.path.joinpath(file_name), 'rb') as f:
         u = cPickle.Unpickler(f)
         dataset = u.load()
     return dataset

コード例 #26

0

ファイルを表示

def unpickle(name):
    with open(name, 'rb') as f:
        pickler = cpickle.Unpickler(f)
        objects = pickler.load()
    return objects

コード例 #27

0

ファイルを表示

def main(feats_path, max_cluster_size):
    print('[INFO] Preparing to cluster features from ' + feats_path)

    # load feature vectors
    with open(feats_path, 'rb') as handle:
        unpickler = pickle.Unpickler(handle)
        index = unpickler.load()
        index = {
            name: vector
            for name, vector in index.items() if vector is not None
        }

    # labels
    labels = {}

    # start timer
    start = time.time()

    # cluster iteratively
    min_cluster_size = max_cluster_size
    while len(index) > 0 and min_cluster_size >= 2:
        print('[INFO] Clustering ' + str(len(index)) +
              ' points with min_cluster_size=' + str(min_cluster_size))
        clusterer = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size,
                                    min_samples=None,
                                    metric='braycurtis',
                                    algorithm='best',
                                    core_dist_n_jobs=-1)

        # reshape 3d vectors to 2d
        dataset = list(index.values())
        dataset = np.asarray(dataset)

        # fit clusterer
        clusterer.fit(dataset)

        # update labels (treat cluster 0 as noise)
        new_labels = clusterer.labels_

        current_max = max(labels.values()) if len(labels) > 0 else -1
        new_labels = [
            label + current_max if label > 0 else -1 for label in new_labels
        ]

        new_labels = dict(zip(index.keys(), new_labels))
        labels.update(new_labels)

        # get features of noise points to recluster
        index = {k: v for k, v in index.items() if new_labels[k] == -1}
        min_cluster_size -= 1

    # end timer and print
    end = time.time()
    print('[INFO] Clustering completed after ' + str(end - start) + ' seconds')

    # save labels to disk
    base = path.basename(feats_path)
    name = path.splitext(base)[0]

    output = '../labels/iterative_' + name + '_mcs' + str(
        max_cluster_size) + '_labels.pickle'
    print('[INFO] Saving labels to ' + output)
    with open(output, 'wb') as handle:
        pickle.dump(labels, handle, protocol=4)

コード例 #28

0

ファイルを表示

ファイル: utils.py プロジェクト: luciaicull/emotionData_analysis

def load_datafile(path, name):
    with open(path.joinpath(name), 'rb') as f:
        u = cPickle.Unpickler(f)
        data = u.load()
    return data

コード例 #29

0

ファイルを表示

def get_data(filename):
    return picles.Unpickler(open('grantData_hw3/' + filename + '.pickle',
                                 'rb')).load()

コード例 #30

0

ファイルを表示

ファイル: test_code.py プロジェクト: TaegyunKwon/pyScoreParser

features = data_set.get_average_by_perform(features)
data_set.save_features_as_csv(features, target_features, path='features_by_piecewise_average.csv')
features = data_set.features_to_list(target_features)
data_set.save_features_by_features_as_csv(features, target_features, path='note.csv')

'''

data_set = data_class.DataSet('pyScoreParser/chopin_cleaned/Haydn/', 'folder')
data_set.load_all_performances()
# score_extractor = feat_ext.ScoreExtractor(['composer_vec'])
for piece in data_set.pieces:
    piece.meta.composer = 'Haydn'
#     piece.score_features['composer_vec'] = score_extractor.get_composer_vec(piece)
data_set.extract_all_features()
data_set.save_dataset('HaydnTest.dat')

with open('HaydnTest.dat', "rb") as f:
    u = cPickle.Unpickler(f)
    data_set = u.load()

# perform_extractor = feat_ext.PerformExtractor(['beat_dynamics', 'measure_dynamics'])
# for piece in data_set.pieces:
#     for performance in piece.performances:
#         performance.perform_features = perform_extractor.extract_perform_features(piece, performance)
# data_set.save_dataset('HaydnTest.dat')

pair_data = dft.PairDataset(data_set)
pair_data.update_dataset_split_type()
pair_data.update_mean_stds_of_entire_dataset()
pair_data.save_features_for_virtuosoNet('HaydnTestFeature')