Python save 예제들, save_data.save Python 예제들

예제 #1

0

파일 보기

def calc_R(beta, save_data_for_one_cluster=False):
    params = {
        'Lx': L,
        'Ly': L,
        'frames': frames,
        'beta': beta,
        'size': [
            3,
        ] * 1,
        'plot': False,
        'save_image': False,
        'strings': [{
            'id': 1,
            'x': L / 4,
            'y': L / 2,
            'vec': [0, 4]
        }],
        'pre_function': calc_radius_of_rotation
    }

    main = Main(**params)
    radius_of_rotation = main.pre_func_res

    ## save data (for one cluster)
    if save_data_for_one_cluster:
        base = "./results/data/radius/frames=%d_beta=%2.2f_" % (frames, beta)
        save_data.save(base,
                       frames=frames,
                       beta=beta,
                       L=L,
                       radius_of_rotation=radius_of_rotation)

    return radius_of_rotation

예제 #2

0

파일 보기

    def get_carlos_params(self):
        for i, animal in enumerate(self.classes):
            current_path = path.join(self.audio_path, animal)
            sounds = os.listdir(current_path)
            self.features[i] = []
            self.class_to_number[animal] = i
            self.number_to_class[i] = animal
            one_class_json = {}
            for sound in sounds:
                if not sound.endswith('.wav'): continue
                AUDIO_FILE = path.join(current_path, sound)
                print("processing " + AUDIO_FILE)
                feat_ = get_params(AUDIO_FILE)

                print(AUDIO_FILE + " processed")
                one_class_json[sound] = feat_

                if len(feat_) == 0: continue
                self.features[i].append(feat_)
            save(one_class_json,
                 path.join(current_path, animal + "_features.json"))
            print(animal + " features has been json saved")
        self.feat_amount = np.array(self.features[0][0]).shape
        #joblib.dump(self.features, "features.pkl")
        save(self.features, "features.json")

예제 #3

0

파일 보기

파일: cutting_profile_run.py 프로젝트: ssh0/growing-string

def main(num_of_strings=30, beta=0., frames=1000, L=100, save_result=True,
         plot_result=True):
    params = {
        'beta': beta,
        'L': L,
        'frames': frames,
        'save_result': False,
        'plot_raw_result': False,
        '_plot_dist_to_verify': False,
    }
    relative_positions = {}
    for i in tqdm(range(num_of_strings)):
        runner = CuttingProfile(**params)
        runner.start()
        for j in range(6):
            if runner.relative_positions.has_key(j):
                if not relative_positions.has_key(j):
                    relative_positions[j] = runner.relative_positions[j]
                else:
                    relative_positions[j] = np.vstack(
                        (relative_positions[j], runner.relative_positions[j]))

    if save_result:
        sd.save("results/data/cutting_profile/" +
                "frames=%d_beta=%2.2f_" % (frames, beta),
                beta=beta, L=L, frames=frames,
                weight_const=runner.weight_const,
                num_of_strings=num_of_strings,
                relative_positions=relative_positions
                )

    if plot_result:
        # plot_all_points(relative_positions)
        plot_hist(relative_positions)

예제 #4

0

파일 보기

파일: cutting_profile.py 프로젝트: ssh0/growing-string

    def start(self):
        self.main = Main(
            Lx=self.L,
            Ly=self.L,
            size=[3,],
            plot=False,
            plot_surface=False,
            frames=self.frames,
            strings=[{'id': 1, 'x': self.L/4, 'y': self.L/2, 'vec': [0, 4]}],
            beta=self.beta,
            weight_const=self.weight_const,
            # pre_function=self.get_cutting_profiles
        )

        self.cutting_profiles = self.get_cutting_profiles()
        self.relative_positions = self.get_relative_positions()

        if self.save_result:
            sd.save("results/data/cutting_profile/" +
                    "frames=%d_beta=%2.2f_" % (self.frames, self.beta),
                    beta=self.beta, L=self.L, frames=self.frames,
                    weight_const=self.weight_const,
                    cutting_profiles=self.cutting_profiles
                    )
        if self.plot_raw_result:
            self.plot_result()

        if self._plot_dist_to_verify:
            self.plot_dist_to_verify()

예제 #5

0

파일 보기

파일: distances.py 프로젝트: ssh0/growing-string

def execute_simulation_for_one_beta(beta,
                                    num_of_strings,
                                    L,
                                    frames,
                                    num_of_pairs,
                                    plot=True,
                                    save_image=False,
                                    save_data=False):
    print "beta = %2.2f, frames = %d" % (beta, frames)
    distance_list = []
    path_length = []
    for s in tqdm(range(num_of_strings)):
        d, pl = get_path_length_and_distances(beta, num_of_strings, L, frames,
                                              num_of_pairs)
        distance_list.append(d)
        path_length.append(pl)

    distance_list = np.array(distance_list).flatten()
    path_length = np.array(path_length).flatten()

    if save_data:
        # sd.save("results/data/distances/beta=%2.2f_" % beta,
        #                beta=beta, num_of_strings=num_of_strings,
        #                L=L, frames=frames, distance_list=distance_list,
        #                path_length=path_length)
        sd.save("results/data/distances/frames=%d_beta=%2.2f_" %
                (frames, beta),
                beta=beta,
                num_of_strings=num_of_strings,
                L=L,
                frames=frames,
                distance_list=distance_list,
                path_length=path_length)

    if plot or save_image:
        fig, ax = plt.subplots()

        # heatmap
        ax.hist2d(distance_list, path_length, bins=25)

        ax.set_xlabel('Distance')
        ax.set_ylabel('Path length')
        ax.set_title(
            'Path length and distances between two points in the cluster' +
            r'($\beta = %2.2f$)' % beta)

        if save_image:
            result_image_path = "results/img/distances/beta=%2.2f" % beta
            result_image_path += "_" + time.strftime("%y%m%d_%H%M%S")
            result_image_path += ".png"
            plt.savefig(result_image_path)
            plt.close()
            print "[saved] " + result_image_path
        else:
            plt.show()

예제 #6

0

파일 보기

파일: mass.py 프로젝트: ssh0/growing-string

def mass_for_beta_one(beta, frames_list, N_r=100, num_of_strings=100):
    frames = np.max(frames_list)
    center_sample = int(np.min(frames_list) / 2)
    L = (frames + 1) * 2

    def calc_mass_in_r(self, i, s):
        N = len(s.vec) + 1
        if N - 3 not in frames_list:
            return None

        pos = list(s.pos.T)
        x, y = self.lattice_X[pos], self.lattice_Y[pos]
        X, Y = np.average(x), np.average(y)
        R = np.sqrt(np.sum((x - X) ** 2 + (y - Y) ** 2) / float(N))
        dist = np.sqrt((x - X) ** 2 + (y - Y) ** 2)
        r = np.logspace(1, np.log2(max(dist)), num=N_r, base=2.)
        centers_index = sorted(random.sample(range(N), center_sample))
        M = []
        for _r in r:
            res = []
            for c in centers_index:
                index_x, index_y = s.pos[c]
                dist = np.sqrt((x - self.lattice_X[index_x, index_y]) ** 2
                                + (y - self.lattice_Y[index_x, index_y]) ** 2)
                res.append(len(np.where(dist < _r)[0]))
            M.append(np.average(res))
        return np.array([r, M]).T

    main = Main(Lx=L, Ly=L, plot=False,
                frames=frames,
                beta=beta,
                strings=[{'id': 1, 'x': L/4, 'y': L/2, 'vec': [0, 4]}],
                post_function=calc_mass_in_r)
    _M = np.array([m for m in main.post_func_res if m is not None])
    Ms = {frames_list[i]: _M[i] for i in range(len(frames_list))}

    for s in tqdm(range(num_of_strings - 1)):
        main = Main(Lx=L, Ly=L, plot=False,
                    frames=frames,
                    beta=beta,
                    strings=[{'id': 1, 'x': L/4, 'y': L/2, 'vec': [0, 4]}],
                    post_function=calc_mass_in_r)
        _M = np.array([m for m in main.post_func_res if m is not None])
        # print _M.shape
        for i, frames in enumerate(frames_list):
            Ms[frames] = np.vstack((Ms[frames], _M[i]))

    for frames in frames_list:
        r, M = Ms[frames].T
        sorted_index = np.argsort(r)
        r, M = r[sorted_index], M[sorted_index]
        save_data.save("./results/data/mass_in_r/beta=%2.2f_frames=%d_" % (beta, frames),
                       num_of_strings=num_of_strings,
                       N_r=N_r, beta=beta, L=L, frames=frames, r=r, M=M)

예제 #7

0

파일 보기

 def vectorize_text(self):
     dict_size = len(self.dictionary.keys())  # cantidad de palabras
     new_features = {}
     for num_class in self.features.keys():
         a = np.zeros(dict_size)
         for word in self.features[
                 num_class]:  # si la palabra esta repetida en la misma sentencia, no se transmite al vectorizar
             a[self.dictionary[word]] = 1
         new_features[num_class] = a
     self.features = new_features
     save(new_features, "words_vector.json")

예제 #8

0

파일 보기

def calc_ave_R(num_of_strings=100):
    R_ave = np.zeros(frames)
    for s in tqdm(range(num_of_strings)):
        R_ave += calc_R(beta)
    R_ave = R_ave / float(num_of_strings)
    base = "./results/data/radius/"
    base += "frames=%d_beta=%2.2f_sample=%d_" % (frames, beta, num_of_strings)
    save_data.save(base,
                   frames=frames,
                   beta=beta,
                   L=L,
                   radius_of_rotation=R_ave)

예제 #9

0

파일 보기

def box_count(beta, frames_list, N_L=20, num_of_strings=100):
    frames = np.max(frames_list)

    string_num = 1
    print 'string ({}/{})'.format(string_num, num_of_strings)
    bc = BoxCounting(
        frames=frames,
        beta=beta,
        frames_list=frames_list,
        N_L=N_L,
        # save_fitting=True,
        # save_fitting_dir="results/img/box_counting/2016-12-01/")
    )
    bc.start()
    _N = np.array([n for n in bc.main.post_func_res if n is not None])
    Ns = {frames_list[i]: _N[i] for i in range(len(frames_list))}

    for s in range(num_of_strings - 1):
        string_num += 1
        print 'string ({}/{})'.format(string_num, num_of_strings)
        bc = BoxCounting(
            frames=frames,
            beta=beta,
            frames_list=frames_list,
            N_L=N_L,
            # save_fitting=True,
            # save_fitting_dir="results/img/box_counting/2016-12-01/")
        )
        bc.start()
        _N = np.array([n for n in bc.main.post_func_res if n is not None])
        for i, frames in enumerate(frames_list):
            Ns[frames] = np.vstack((Ns[frames], _N[i]))

    for frames in frames_list:
        Ls, N = Ns[frames].T
        sorted_index = np.argsort(Ls)
        Ls, N = Ls[sorted_index], N[sorted_index]
        # save_data.save("./results/data/box_counting/2017-01-27/" +
        save_data.save("./results/data/box_counting/2017-01-29/" +
                       "beta=%2.2f_frames=%d_" % (beta, frames),
                       num_of_strings=num_of_strings,
                       N_L=N_L,
                       beta=beta,
                       L=bc.L,
                       frames=frames,
                       Ls=Ls,
                       N=N)

예제 #10

0

파일 보기

파일: mass_in_r_run.py 프로젝트: ssh0/growing-string

def mass_in_r_for_one_beta(beta, num_of_strings, L, frames, plot=True,
                           optimize=False, save_image=False, save_data=False):
    print "beta = %2.2f" % beta
    r = None
    rs = []
    Ms = []
    for s in tqdm(range(num_of_strings)):
        r, M = get_mass_in_r_for_one_string(L, frames, beta, r)
        rs.append(r)
        Ms.append(M)

    r = np.average(np.array(rs), axis=0)
    M = np.average(np.array(Ms), axis=0)

    if save_data:
        save_data.save("results/data/mass_in_r/beta=%2.2f_" % beta,
                       num_of_strings=num_of_strings,
                       beta=beta, L=L, frames=frames, r=r, M=M)

    if plot or save_image:
        fig, ax = plt.subplots()
        ax.loglog(r, M)
        ax.set_xlabel('Radius $r$ from the center of gravity')
        ax.set_ylabel('Mass in a circle with radius $r$')
        ax.set_title('$r$ vs. $M(r)$')

        if optimize:
            index_stop = len(r) - 5
            optimizer = Optimize_powerlaw(args=(r[:index_stop],
                                                M[:index_stop]),
                                        parameters=[0., 2.])
            result = optimizer.fitting()
            print "D = %f" % result['D']
            ax.loglog(r[:index_stop], optimizer.fitted(r[:index_stop]), lw=2,
                        label='D = %f' % result['D'])
            ax.legend(loc='best')

        if save_image:
            result_image_path = "results/img/mass_in_r/beta=%2.2f" % beta
            result_image_path += "_" + time.strftime("%y%m%d_%H%M%S")
            result_image_path += ".png"
            plt.savefig(result_image_path)
            plt.close()
            print "[saved] " + result_image_path
        else:
            plt.show()

예제 #11

0

파일 보기

파일: distances.py 프로젝트: ssh0/growing-string

def execute_simulation_for_one_beta(beta, num_of_strings, L, frames, 
                                    num_of_pairs, plot=True,
                                    save_image=False, save_data=False):
    print "beta = %2.2f, frames = %d" % (beta, frames)
    distance_list = []
    path_length = []
    for s in tqdm(range(num_of_strings)):
        d, pl = get_path_length_and_distances(beta, num_of_strings, L, frames,
                                              num_of_pairs)
        distance_list.append(d)
        path_length.append(pl)

    distance_list = np.array(distance_list).flatten()
    path_length = np.array(path_length).flatten()

    if save_data:
        # sd.save("results/data/distances/beta=%2.2f_" % beta,
        #                beta=beta, num_of_strings=num_of_strings,
        #                L=L, frames=frames, distance_list=distance_list,
        #                path_length=path_length)
        sd.save("results/data/distances/frames=%d_beta=%2.2f_" % (frames, beta),
                       beta=beta, num_of_strings=num_of_strings,
                       L=L, frames=frames, distance_list=distance_list,
                       path_length=path_length)

    if plot or save_image:
        fig, ax = plt.subplots()

        # heatmap
        ax.hist2d(distance_list, path_length, bins=25)

        ax.set_xlabel('Distance')
        ax.set_ylabel('Path length')
        ax.set_title('Path length and distances between two points in the cluster'
                    + r'($\beta = %2.2f$)' % beta)

        if save_image:
            result_image_path = "results/img/distances/beta=%2.2f" % beta
            result_image_path += "_" + time.strftime("%y%m%d_%H%M%S")
            result_image_path += ".png"
            plt.savefig(result_image_path)
            plt.close()
            print "[saved] " + result_image_path
        else:
            plt.show()

예제 #12

0

파일 보기

파일: main.py 프로젝트: Sheventon/DataMiningEducation

def most_common_words(connection):
    file = open('itis_kfu.txt', encoding="utf-8")
    text = file.read()
    stop_symbols = r'.,:\!/?*-_•–—0123456789&"'
    wordcount = {}
    for word in text.lower().split():
        if word not in stop_symbols:
            if word not in wordcount:
                wordcount[word] = 1
            else:
                wordcount[word] += 1

    n_print = 100
    print("\nOK. The {} most common words are as follows\n".format(n_print))
    word_counter = collections.Counter(wordcount)
    for word, count in word_counter.most_common(n_print):
        save(connection, word, count)
        print(word, ": ", count)

    file.close()

예제 #13

0

파일 보기

    def post(self):
        data_string = self.get_body_argument('data_string')
        sentences_string = self.get_body_argument('sentences_string')
        svg_string = self.get_body_argument('svg_string')
        major_dim = self.get_body_argument('major_name')
        second_dim = self.get_body_argument('second_name')
        user_name = self.get_body_argument('user_name')
        total_number = self.get_body_argument('total_number')
        # logger.info(sentences_string)
        data = {}
        data['user_name'] = user_name
        data['total_number'] = total_number
        data['data'] = json.loads(data_string)
        data['svg_string'] = svg_string
        data['sentences'] = json.loads(sentences_string)
        data['major_dim'] = major_dim
        data['second_dim'] = second_dim
        save(data)

        self.set_header('Content-Type', 'application/json; charset=UTF-8')
        self.write(json.dumps({'message': 'ok'}))
        self.finish()

예제 #14

0

파일 보기

 def get_features(self, filter_args=None):
     # self.classes = ["cat", "Frog"]
     # inp = 1
     # self.classes = [self.classes[inp]]
     for i, animal in enumerate(self.classes):
         current_path = path.join(self.audio_path, animal)
         sounds = os.listdir(current_path)
         self.features[i] = []
         self.class_to_number[animal] = i
         self.number_to_class[i] = animal
         for sound in sounds:
             if not sound.endswith('.wav'): continue
             AUDIO_FILE = path.join(current_path, sound)
             try:
                 audio = MonoLoader(filename=AUDIO_FILE)()
             except:
                 continue
             if filter_args:
                 audio = highpass_filter(audio, 44100, **filter_args)
             feat_ = []
             for f in self.funcs_:
                 aux = do_from_name(f, audio, AUDIO_FILE)
                 # print("{0} dice {1}".format(self.classes[i], aux))
                 # print(len(aux))
                 if f == "text":
                     if aux == -1: continue
                     aux = aux.split()
                     for w in aux:
                         if not self.dictionary.__contains__(w):
                             self.dictionary[w] = len(
                                 self.dictionary.keys())
                 feat_.extend(aux)
             if len(feat_) == 0: continue
             self.features[i].append(feat_)
     kaux = self.features[0][0]
     # self.feat_amount = len(self.features[0][0])
     self.feat_amount = np.array(self.features[0][0]).shape
     #joblib.dump(self.features, "features.pkl")
     save(self.features, "features.json")

예제 #15

0

파일 보기

if __name__ == '__main__':
    current_time = time.strftime("%y%m%d_%H%M%S")

    L = 2000
    frames = 1000
    num_of_strings = 30

    betas = [0., 5., 10., 15., 20.]
    Rs = []
    for beta in betas:

        R = []
        for s in range(num_of_strings):
            filled_kagome = FilledKagome(beta=beta, L=L, frames=frames)
            R.append(filled_kagome.R)
        Rs.append(np.average(R))

        save_data.save("results/data/filled_kagome_radius/beta=%2.2f_" % beta,
                       beta=beta, num_of_strings=num_of_strings,
                       L=L, frames=frames, R=R)

    fig, ax = plt.subplots()

    # heatmap
    ax.plot(, path_length, bins=25)

    ax.set_xlabel('Distance')
    ax.set_ylabel('Path length')
    ax.set_title('Path length and distances between two points in the cluster'
                + r'($\beta = %2.2f$)' % beta)

예제 #16

0

파일 보기

    # betas = [float(i) for i in range(11)]
    # betas = [20.]
    frames = 1000
    L = (frames + 1) * 2
    num_of_pairs = 100

    fig, ax = plt.subplots()

    for beta in betas:
        print "beta = %2.2f" % beta
        Lp, Cs = get_correlation(beta, num_of_strings, L, frames, num_of_pairs)
        ax.plot(Lp, Cs, '.', label=r'$\beta = %2.2f$' % beta)

        # save the data
        save_data.save("results/data/correlation/beta=%2.2f_" % beta,
                       num_of_strings=num_of_strings,
                       beta=beta, L=L, frames=frames, Lp=Lp, Cs=Cs)

    ax.set_xlabel('Path length')
    ax.set_ylabel('Correlation of the vectors')
    ax.set_title('Correlation of the vectors')
    ax.legend(loc='best')

    result_image_path = "results/img/correlation/strings=%d" % num_of_strings
    result_image_path += "_" + start_time
    result_image_path += ".png"
    plt.savefig(result_image_path)
    plt.close()
    print "[saved] " + result_image_path

    # plt.show()

예제 #17

0

파일 보기

def mass_for_beta_one(beta, frames_list, N_r=100, num_of_strings=100):
    frames = np.max(frames_list)
    center_sample = int(np.min(frames_list) / 2)
    L = (frames + 1) * 2

    def calc_mass_in_r(self, i, s):
        N = len(s.vec) + 1
        if N - 3 not in frames_list:
            return None

        pos = list(s.pos.T)
        x, y = self.lattice_X[pos], self.lattice_Y[pos]
        X, Y = np.average(x), np.average(y)
        R = np.sqrt(np.sum((x - X)**2 + (y - Y)**2) / float(N))
        dist = np.sqrt((x - X)**2 + (y - Y)**2)
        r = np.logspace(1, np.log2(max(dist)), num=N_r, base=2.)
        centers_index = sorted(random.sample(range(N), center_sample))
        M = []
        for _r in r:
            res = []
            for c in centers_index:
                index_x, index_y = s.pos[c]
                dist = np.sqrt((x - self.lattice_X[index_x, index_y])**2 +
                               (y - self.lattice_Y[index_x, index_y])**2)
                res.append(len(np.where(dist < _r)[0]))
            M.append(np.average(res))
        return np.array([r, M]).T

    main = Main(Lx=L,
                Ly=L,
                plot=False,
                frames=frames,
                beta=beta,
                strings=[{
                    'id': 1,
                    'x': L / 4,
                    'y': L / 2,
                    'vec': [0, 4]
                }],
                post_function=calc_mass_in_r)
    _M = np.array([m for m in main.post_func_res if m is not None])
    Ms = {frames_list[i]: _M[i] for i in range(len(frames_list))}

    for s in tqdm(range(num_of_strings - 1)):
        main = Main(Lx=L,
                    Ly=L,
                    plot=False,
                    frames=frames,
                    beta=beta,
                    strings=[{
                        'id': 1,
                        'x': L / 4,
                        'y': L / 2,
                        'vec': [0, 4]
                    }],
                    post_function=calc_mass_in_r)
        _M = np.array([m for m in main.post_func_res if m is not None])
        # print _M.shape
        for i, frames in enumerate(frames_list):
            Ms[frames] = np.vstack((Ms[frames], _M[i]))

    for frames in frames_list:
        r, M = Ms[frames].T
        sorted_index = np.argsort(r)
        r, M = r[sorted_index], M[sorted_index]
        save_data.save("./results/data/mass_in_r/beta=%2.2f_frames=%d_" %
                       (beta, frames),
                       num_of_strings=num_of_strings,
                       N_r=N_r,
                       beta=beta,
                       L=L,
                       frames=frames,
                       r=r,
                       M=M)

예제 #18

0

파일 보기

import time, datetime, pythonping, json, save_data
with open("data.txt", "r") as f:
    count = len(json.load(f))
while True:
    count += 1
    with open("data.txt", "r") as f:
        data = json.load(f)
    data[str(datetime.datetime.now())] = pythonping.ping('sz.de',
                                                         size=256,
                                                         count=1).rtt_avg_ms
    with open("data.txt", "w") as f:
        json.dump(data, f)
    time.sleep(4.9)
    print(count)
    if count > 10000:
        save_data.save()
        time.sleep(3)

예제 #19

0

파일 보기

파일: diecutting_hexagonal_run.py 프로젝트: ssh0/growing-string

def eval_simulation_for_one_beta(beta, num_of_strings=30):
    current_time = time.strftime("%y%m%d_%H%M%S")
    frames = 1000
    params = {
        'L': (frames + 2) * 2,
        'frames': frames,
        'beta': beta,
        'plot': False
    }

    d = {k: {} for k in result_set.keys()}
    for s in tqdm(range(num_of_strings)):
        # _Ls, _N_sub = eval_simulation_for_each_string(params)
        _Ls, _res = eval_simulation_for_each_string(params)
        for k in result_set.keys():
            for i, l in enumerate(_Ls):
                if d[k].has_key(l):
                    d[k][l].append(_res[k][i])
                else:
                    d[k][l] = [_res[k][i],]

    if d.has_key('num_of_sub_clusters'):
        # # 以下のやり方だと，Lが存在しないサンプルに対して無視した結果となる
        # mean = [(k, np.average(v)) for k, v in d.items()]
        # カットサイズLが存在しない場合には0で置き換えたような平均のとり方
        mean = [(k, np.sum(v) / float(num_of_strings))
                for k, v in d['num_of_sub_clusters'].items()]
        Ls, N_sub = np.array(sorted(mean)).T
    else:
        N_sub = []

    if d.has_key('size_dist_of_sub_clusters'):
        size_dist = {}
        for k, v in d['size_dist_of_sub_clusters'].items():
            # size_dist[k] = map(sum, itertools.izip_longest(*v, fillvalue=0))
            size_dist[k] = map(sum, itertools.izip_longest(*v, fillvalue=1))
        Ls = sorted(size_dist.keys())
        size_dist = [size_dist[k] for k in Ls]
        S = np.zeros((len(size_dist), max(map(len, size_dist))))
        for i, s in enumerate(size_dist):
            for j, num in enumerate(s):
                S[i][j] = num
        size_dist = S
    else:
        size_dist = []

    import numpy.ma as ma
    def masked_average(arr):
        return ma.array(arr, mask=np.array(arr) == -1).mean()

    if d.has_key('size_dist_ave_of_sub_clusters'):
        size_dist_ave = {}
        for k, v in d['size_dist_ave_of_sub_clusters'].items():
            num_when_L = itertools.izip_longest(*v, fillvalue=-1)
            size_dist_ave[k] = map(masked_average, num_when_L)
        Ls = sorted(size_dist_ave.keys())
        size_dist_ave = [size_dist_ave[k] for k in Ls]
        S = np.zeros((len(size_dist_ave), max(map(len, size_dist_ave))))
        for i, s in enumerate(size_dist_ave):
            for j, num in enumerate(s):
                S[i][j] = num
        size_dist_ave = S
    else:
        size_dist_ave = []

    save_data.save("../results/data/diecutting/beta=%2.2f_" % beta,
                   beta=beta, num_of_strings=num_of_strings,
                   L=params['L'], frames=params['frames'],
                   Ls=Ls, N_sub=N_sub, size_dist=size_dist,
                   size_dist_ave=size_dist_ave)

예제 #20

0

파일 보기

파일: read_word.py 프로젝트: chmaojian/text_match

import docx
import save_data
import os

path = '.\data_all'
save_path = '.\save_data'
files = os.listdir(path)

if not os.path.exists(save_path):
    os.makedirs(save_path)

save_data.delete(save_path)

for file in files:
    if not os.path.isdir(file):
        doc = docx.Document(path + '/' + file)
        for para in doc.paragraphs:
            data = para.text
            if len(data) > 20:
                r1, r2, r3 = save_data.match(data)
                file_new = file.replace('.docx', '.csv')
                save_data.save(r1, r2, r3, save_path + '/' + file_new)