Esempio n. 1
0
    def fit(self, dataset, dirty_train):
        index_train_path = utils.get_dir(dataset, 'raw', 'idx_train.csv')
        index_test_path = utils.get_dir(dataset, 'raw', 'idx_test.csv')
        dirty_train_path = utils.get_dir(dataset, 'raw', 'dirty_train.csv')
        dirty_test_path = utils.get_dir(dataset, 'raw', 'dirty_test.csv')

        index_train = pd.read_csv(index_train_path).values.reshape(-1)
        index_test = pd.read_csv(index_test_path).values.reshape(-1)
        ind_path = utils.get_dir(dataset, 'raw', 'AutoER.csv')

        autoer_result = pd.read_csv(ind_path).values.reshape(-1)

        ind_train = autoer_result[index_train]
        ind_test = autoer_result[index_test]

        dirty_train = pd.read_csv(dirty_train_path)
        dirty_test = pd.read_csv(dirty_test_path)

        train_mv = dirty_train.isnull().values.any(axis=1)
        test_mv = dirty_test.isnull().values.any(axis=1)

        ind_train = ind_train[train_mv == False]
        ind_test = ind_test[test_mv == False]

        ind_train = pd.DataFrame(ind_train.reshape(-1, 1), columns=["label"])
        ind_test = pd.DataFrame(ind_test.reshape(-1, 1), columns=["label"])

        self.ind_train = ind_train.duplicated(keep="first").values
        self.ind_test = ind_test.duplicated(keep="first").values
Esempio n. 2
0
def main():
    if request.method == 'POST':
        # Get the name of the uploaded images
        uploaded_files = request.files.getlist('image[]')
        scene_id = str(uuid.uuid4())
        scene_dir = get_dir(DATA_DIR, scene_id)
        upload_dir = get_dir(scene_dir, 'upload')
        result_dir = get_dir(scene_dir, 'result')
        for fi, file in enumerate(uploaded_files):
            # Save the file
            if file and allowed_file(file.filename):
                _, file_extension = osp.splitext(file.filename)
                view_id = str(fi)
                file.save(osp.join(upload_dir, view_id + cfg.EXT))

        # Schedule a task to backend
        task = osp.join(TASK_DIR, scene_id)
        open(task, 'wb').write('')
        while osp.exists(task):
            time.sleep(cfg.DELAY)

        filenames = os.listdir(result_dir)
        return render_template('result.html',
                               scene_id=scene_id,
                               filenames=filenames)

    return render_template('main.html')
Esempio n. 3
0
def init(dataset, test_ratio=0.3, seed=1, max_size=None):
    """ Initialize dataset: raw -> dirty -> dirty_train, dirty_test
        
        Args:
            dataset (dict): dataset dict in config.py
            max_size (int): maximum limit of dataset size
            test_ratio: ratio of test set
            seed: seed used to split dataset
    """
    print("Initialize dataset {}".format(dataset['data_dir']))

    # load raw data
    raw_path = utils.get_dir(dataset, 'raw', 'raw.csv')
    raw = pd.read_csv(raw_path)

    # delete missing labels or all missing values
    if 'missing_values' not in dataset['error_types']:
        dirty = delete_missing_values(raw)
    else:
        dirty = delete_missing_labels(raw, dataset['label'])

    # split dataset
    train, test, idx_train, idx_test = split(dirty, test_ratio, seed, max_size)

    # save train / test
    save_path_pfx = utils.get_dir(dataset, 'raw', 'dirty')
    utils.save_dfs(train, test, save_path_pfx)

    # save the version (seed) of dataset
    utils.save_version(save_path_pfx, seed)

    # save index
    save_path_pfx = utils.get_dir(dataset, 'raw', 'idx')
    utils.save_dfs(idx_train, idx_test, save_path_pfx)
Esempio n. 4
0
def preprocess(dataset,
               error_type,
               train_file,
               normalize=True,
               down_sample_seed=1):
    """Load and preprocess data

    Args:
        dataset (dict): dataset dict in config
        error_type (string): error type
        train_file (string): prefix of file of training set
        normalize (bool): whehter to standarize the data
        down_sample_seed: seed for down sampling
    """
    # check train and test version are consistent
    check_version(dataset, error_type, train_file)

    # get path of train file and test files
    train_path = utils.get_dir(dataset, error_type, train_file + "_train.csv")
    test_files = utils.get_test_files(error_type, train_file)
    test_path_list = [
        utils.get_dir(dataset, error_type, test_file + "_test.csv")
        for test_file in test_files
    ]

    # load data
    X_train, y_train, X_test_list, y_test_list = load_data(
        dataset, train_path, test_path_list)

    ## preprocess data
    # drop irrelavant features
    if "drop_variables" in dataset.keys():
        drop_columns = dataset['drop_variables']
        drop_variables(X_train, X_test_list, drop_columns)

    # down sample if imbalanced
    if "class_imbalance" in dataset.keys() and dataset["class_imbalance"]:
        X_train, y_train = down_sample(X_train, y_train, down_sample_seed)

    # encode label
    if dataset['ml_task'] == 'classification':
        y_train, y_test_list = encode_cat_label(y_train, y_test_list)

    # text embedding
    if "text_variables" in dataset.keys():
        text_columns = dataset["text_variables"]
        X_train, X_test_list = encode_text_features(X_train, X_test_list,
                                                    y_train, text_columns)

    # encode categorical features
    X_train, X_test_list = encode_cat_features(X_train, X_test_list)

    # normalize data
    if normalize:
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test_list = [scaler.transform(X_test) for X_test in X_test_list]

    return X_train, y_train, X_test_list, y_test_list, test_files
Esempio n. 5
0
 def fit(self, dataset, dirty_train):
     index_train_path = utils.get_dir(dataset, 'raw', 'idx_train.csv')
     index_test_path = utils.get_dir(dataset, 'raw', 'idx_test.csv')
     index_train = pd.read_csv(index_train_path).values.reshape(-1)
     index_test = pd.read_csv(index_test_path).values.reshape(-1)
     clean_path = utils.get_dir(dataset, 'raw', 'mislabel_clean_raw.csv')
     clean = utils.load_df(dataset, clean_path)
     self.clean_train = clean.loc[index_train, :]
     self.clean_test = clean.loc[index_test, :]
Esempio n. 6
0
def check_version(dataset, error_type, train_file):
    """Check whether train and test are of the same version"""
    train_path_pfx = utils.get_dir(dataset, error_type, train_file)
    train_version = utils.get_version(train_path_pfx)
    test_files = utils.get_test_files(error_type, train_file)
    for test_file in test_files:
        test_path_pfx = utils.get_dir(dataset, error_type, test_file)
        test_version = utils.get_version(test_path_pfx)
        assert (train_version == test_version)
Esempio n. 7
0
    def __init__(self, save_dir):
        self.save_dir = utils.get_dir(save_dir)

        self.model_save_dir = utils.get_dir(
            os.path.join(self.save_dir, 'models'))
        self.summary_save_dir = utils.get_dir(
            os.path.join(self.save_dir, 'summaries'))
        self.image_save_dir = utils.get_dir(
            os.path.join(self.save_dir, 'images'))
        self.log_save_dir = utils.get_dir(os.path.join(self.save_dir, 'logs'))
Esempio n. 8
0
    def fit(self, dataset, df):
        clean_raw_path = utils.get_dir(dataset, 'raw',
                                       'Holoclean_mv_clean.csv')
        clean_raw = pd.read_csv(clean_raw_path)

        index_train_path = utils.get_dir(dataset, 'raw', 'idx_train.csv')
        index_test_path = utils.get_dir(dataset, 'raw', 'idx_test.csv')
        index_train = pd.read_csv(index_train_path).values.reshape(-1)
        index_test = pd.read_csv(index_test_path).values.reshape(-1)

        self.clean_train = clean_raw.iloc[index_train, :]
        self.clean_test = clean_raw.iloc[index_test, :]
Esempio n. 9
0
def set_save_name(name):
    """
    Edits all constants dependent on SAVE_NAME.

    @param name: The new save name.
    """
    global SAVE_NAME, MODEL_SAVE_DIR, SUMMARY_SAVE_DIR, IMG_SAVE_DIR

    SAVE_NAME = name
    MODEL_SAVE_DIR = utils.get_dir(os.path.join(SAVE_DIR, SAVE_NAME, 'Models'))
    SUMMARY_SAVE_DIR = utils.get_dir(
        os.path.join(SAVE_DIR, SAVE_NAME, 'Summaries'))
    IMG_SAVE_DIR = utils.get_dir(os.path.join(SAVE_DIR, SAVE_NAME, 'Images'))
    print('Set save dir to {}'.format(os.path.join(SAVE_DIR, SAVE_NAME)))
Esempio n. 10
0
def get_img(img, size=None):
    path = os.path.join(utils.get_dir(__file__), "img", img)
    img = PIL.Image.open(path)
    if size and (size < img.size):
        log.debug("Resizing %s; requested size: %s, orig image size: %s", img.filename, size, img.size)
        img = img.resize(size, PIL.Image.LANCZOS)
    return PIL.ImageTk.PhotoImage(img)
Esempio n. 11
0
    def out_as_csv(self, filename):
        print(len(self.gbid_dic))
        newdir = utils.get_dir('out')
        out_name = os.path.join(newdir, filename)

        days_list = list(self.days.keys())
        days_list.sort()
        csv = csv_output.CSVOutPut()
        for i, day in enumerate(days_list):
            day_dict = self.days[day]
            dur_dict = {}
            csv.set(0, i * 2, day)
            csv.set(0, i * 2 + 1, day)
            for uk, lo in day_dict.items():
                dur_dict[lo.get_duration()] = dur_dict.get(
                    lo.get_duration(), 0) + 1

            dur_list = list(dur_dict.keys())
            dur_list.sort()
            for j in range(121):
                csv.set(j + 1, i * 2, j)
                csv.set(j + 1, i * 2 + 1, dur_dict.get(j, 0))

            # for j, dur in enumerate(dur_list):
            #     count = dur_dict[dur]
            #     csv.set(j + 1, i * 2, dur)
            #     csv.set(j + 1, i * 2 + 1, count)

        csv.output(out_name)
Esempio n. 12
0
def main(argv=None):

    save_dir = utils.get_dir(FLAGS.save_dir)

    file_paths, labels = load_pathnlabel(FLAGS.annofile_path, abs_path=FLAGS.abs_path)
    assert len(file_paths) == len(labels)

    wrtie_videoframes_to_tfrecord(file_paths, labels, save_dir, frame_limit=FLAGS.min_len)
Esempio n. 13
0
 def fit(self, dataset, dirty_train):
     dirty_raw_path = utils.get_dir(dataset, 'raw', 'raw.csv')
     clean_raw_path = utils.get_dir(dataset, 'raw',
                                    'inconsistency_clean_raw.csv')
     if not os.path.exists(clean_raw_path):
         print(
             "Must provide clean version of raw data for cleaning inconsistency"
         )
         sys.exit(1)
     dirty_raw = utils.load_df(dataset, dirty_raw_path)
     clean_raw = utils.load_df(dataset, clean_raw_path)
     N, m = dirty_raw.shape
     dirty_raw = dirty_raw.values
     clean_raw = clean_raw.values
     mask = (dirty_raw != clean_raw)
     dirty = dirty_raw[mask]
     clean = clean_raw[mask]
     self.incon_dict = dict(zip(dirty, clean))
Esempio n. 14
0
def clean_error(dataset, error):
    """ Clean one error in the dataset.
    
    Args:
        dataset (dict): dataset dict in dataset.py
        error (string): error type
    """
    # create saving folder
    save_dir = utils.get_dir(dataset, error, create_folder=True)

    # load dirty data
    dirty_path_pfx = utils.get_dir(dataset, 'raw', 'dirty')
    dirty_train, dirty_test, version = utils.load_dfs(dataset,
                                                      dirty_path_pfx,
                                                      return_version=True)

    # delete missing values if error type is not missing values
    if error != 'missing_values':
        dirty_train = dirty_train.dropna().reset_index(drop=True)
        dirty_test = dirty_test.dropna().reset_index(drop=True)

    # save dirty data
    dirty_path_pfx = os.path.join(save_dir, 'dirty')
    utils.save_dfs(dirty_train, dirty_test, dirty_path_pfx, version)

    # clean the error in the dataset with various cleaning methods
    error_type = utils.get_error(error)
    for clean_method, cleaner in error_type['clean_methods'].items():
        print(
            "        - Clean the error with method '{}'".format(clean_method))
        # fit on dirty train and clean both train and test
        cleaner.fit(dataset, dirty_train)
        clean_train, ind_train, clean_test, ind_test = cleaner.clean(
            dirty_train, dirty_test)

        # save clean train and test data
        clean_path_pfx = os.path.join(save_dir, clean_method)
        utils.save_dfs(clean_train, clean_test, clean_path_pfx, version)

        # save indicator
        ind_path_pfx = os.path.join(save_dir,
                                    'indicator_{}'.format(clean_method))
        utils.save_dfs(ind_train, ind_test, ind_path_pfx)
Esempio n. 15
0
def wrtie_videoframes_to_tfrecord(file_paths, labels, save_dir, frame_limit=None):

    assert len(file_paths) == len(labels), 'Files and labels mismatch'
    print 'Total # of files {:d}'.format(len(file_paths))

    save_dir = utils.get_dir(save_dir)

    # filter away
    if frame_limit:
        filtered_pairs = [(f, l) for f, l in zip(file_paths, labels)
                          if len(glob.glob(os.path.join(f,'*.{:s}'.format(FLAGS.image_format)))) >= frame_limit]

        file_paths, labels = zip(*filtered_pairs)
        assert len(file_paths) == len(labels), 'Checking filtering'

    n_files = len(file_paths)
    print '# of valid videos: {:d}'.format(n_files)

    for i, s_label, s_filename in zip(range(n_files), labels, file_paths):

        filename_stem = s_filename.split(os.sep)[-1]
        print 'Processing {:d} | {:d} \t {:s}'.format(i, n_files, filename_stem)
        file_list = glob.glob(os.path.join(s_filename, '*.{:s}'.format(FLAGS.image_format)))
        n_images = len(file_list)
        images = []

        tf_save_name = os.path.join(save_dir, '{:s}.{:s}'.format(filename_stem, FLAGS.tf_format))
        writer = tf.python_io.TFRecordWriter(tf_save_name)

        for single_filename in file_list:
            img = imread(single_filename, mode='RGB')
            images.append(img)

        np_images = np.array(images).astype(np.uint8)
        seq_shape = np_images.shape
        seq_d = seq_shape[0]    # depth, length of seq
        assert n_images==seq_d
        seq_h = seq_shape[1]    # height
        seq_w = seq_shape[2]    # width
        seq_c = seq_shape[3]    # channels
        image_raw = np_images.tostring()
        example = tf.train.Example(features=tf.train.Features(feature={
            'd': _int64_feature(seq_d),
            'h': _int64_feature(seq_h),
            'w': _int64_feature(seq_w),
            'c': _int64_feature(seq_c),
            'label': _int64_feature(int(s_label)),
            'image': _bytes_feature(image_raw),
            'filename': _bytes_feature(filename_stem)}))

        writer.write(example.SerializeToString())

    print 'Done'
Esempio n. 16
0
    def filter_sys_log(tag_name):
        new_dir = utils.get_dir('tmp')
        basename = os.path.basename(const.SYS_LOG_NAME)
        fw_name = os.path.join(new_dir, '{}.{}.log'.format(basename, tag_name))
        fw = utils.utf8_open(fw_name, 'w')
        with utils.utf8_open(const.SYS_LOG_NAME, encoding='utf-8') as fr:
            for line in fr:
                if tag_name in line:
                    fw.write(line)

        fw.close()
        return fw_name
Esempio n. 17
0
    def fit(self, dataset, df):
        clean_raw_path = utils.get_dir(dataset, 'raw',
                                       'Holoclean_outlier_clean.csv')
        index_train_path = utils.get_dir(dataset, 'raw', 'idx_train.csv')
        index_test_path = utils.get_dir(dataset, 'raw', 'idx_test.csv')

        index_train = pd.read_csv(index_train_path).values.reshape(-1)
        index_test = pd.read_csv(index_test_path).values.reshape(-1)
        clean_raw = pd.read_csv(clean_raw_path)

        if 'missing_values' in dataset['error_types']:
            dirty_train = pd.read_csv(
                utils.get_dir(dataset, 'raw', 'dirty_train.csv'))
            dirty_test = pd.read_csv(
                utils.get_dir(dataset, 'raw', 'dirty_test.csv'))
            raw = pd.read_csv(utils.get_dir(dataset, 'raw', 'raw.csv'))
            raw_mv_rows = raw.isnull().values.any(axis=1)
            train_mv_rows = dirty_train.isnull().values.any(axis=1)
            test_mv_rows = dirty_test.isnull().values.any(axis=1)

            old_index = np.arange(len(raw))[raw_mv_rows == False]
            new_index = np.arange(len(raw) - sum(raw_mv_rows))
            index_map = {}

            for o, n in zip(old_index, new_index):
                index_map[o] = n

            index_train_no_mv = index_train[train_mv_rows == False]
            index_test_no_mv = index_test[test_mv_rows == False]

            index_train = [index_map[i] for i in index_train_no_mv]
            index_test = [index_map[i] for i in index_test_no_mv]

        self.clean_train = clean_raw.iloc[index_train, :]
        self.clean_test = clean_raw.iloc[index_test, :]
Esempio n. 18
0
 def __init__(self):
     self.captcha = Captcha()
     self.text, image = self.captcha.get_captcha_text_and_image()
     self.IMAGE_HEIGHT = 60
     self.IMAGE_WIDTH = 160
     self.MAX_CAPTCHA = len(self.text)
     self.CHAR_SET_LEN = self.captcha.get_len_char_set()
     self.X = tf.placeholder(tf.float32,
                             [None, self.IMAGE_HEIGHT * self.IMAGE_WIDTH])
     self.Y = tf.placeholder(tf.float32,
                             [None, self.MAX_CAPTCHA * self.CHAR_SET_LEN])
     self.keep_prob = tf.placeholder(tf.float32)
     self.model_path = is_dir(get_dir() + "ckpt" + os.sep)
     self.output = self.creat_captcha_cnn()
Esempio n. 19
0
    def filter_guild_train():
        tmp_dir = utils.get_dir('tmp')
        fw_name = os.path.join(tmp_dir, 'guild_train_tlog.log')
        if os.path.exists(fw_name):
            return fw_name

        fw = utils.utf8_open(fw_name, 'w')
        for line in utils.get_origin_line_stream():
            if 'guild train upgrade' in line and line.startswith(
                    'LOG_GUILD_CONTRIBUTION'):
                fw.write(line)

        fw.close()
        return fw_name
Esempio n. 20
0
def reset(dataset):
    """ Reset dataset"""
    # delete folders for each error
    for error in dataset['error_types']:
        utils.remove(utils.get_dir(dataset, error))

    # delete dirty_train and dirty_test in raw folder
    utils.remove(utils.get_dir(dataset, 'raw', 'dirty_train.csv'))
    utils.remove(utils.get_dir(dataset, 'raw', 'dirty_test.csv'))
    utils.remove(utils.get_dir(dataset, 'raw', 'dirty.csv'))
    utils.remove(utils.get_dir(dataset, 'raw', 'idx_train.csv'))
    utils.remove(utils.get_dir(dataset, 'raw', 'idx_test.csv'))
    utils.remove(utils.get_dir(dataset, 'raw', 'version.json'))
Esempio n. 21
0
 def __init__(self, color, seed, x, y, size=36):
     self.seed = seed
     self.generator = random.Random(seed)
     self.color = color
     self.size = size
     self.sprites = sprite.RenderUpdates()
     self.sprite = sprite.Sprite()
     self.dirName = get_dir()
     self._path = self.dirName + "ghost_" + self.color + ".png"
     img = pygame.image.load(self._path)
     self.sprite.image = pygame.transform.scale(img, (self.size, self.size))
     self.sprites.add(self.sprite)
     self.position = (x, y)
     self.startposition = self.position
     self.reset()
Esempio n. 22
0
    def out_as_csv(self, filename):
        out_dir = utils.get_dir('out')
        out_name = os.path.join(out_dir, filename)

        csv = csv_output.CSVOutPut()
        index = 0
        for day, day_dict in self.uk_day_dict.items():
            sum_score = sum(day_dict.values())
            csv.set(0, index, day)

            csv.set(1, index, sum_score / len(day_dict))
            csv.set(2, index, sum_score)
            csv.set(3, index, len(day_dict))
            index += 1

        csv.output(out_name)
Esempio n. 23
0
    def filter_login_log():
        dirname = utils.get_dir('tmp')
        fw_name = utils.get_out_name('tmp', 'log_in_and_out.log')
        if os.path.exists(fw_name):
            return fw_name

        fw = utils.utf8_open(fw_name, 'w', encoding='utf-8')
        for line in utils.get_origin_line_stream():
            if not (line.startswith('SecLogin')
                    or line.startswith('SecLogout')):
                continue

            fw.write(line)

        fw.close()
        return fw_name
Esempio n. 24
0
    def out_as_csv(self, csv_name):
        dirname = utils.get_dir('out')
        full_csv_name = os.path.join(dirname, csv_name)
        days_list = list(self.days.keys())
        days_list.sort()
        with utils.utf8_open(full_csv_name, 'w') as fw:
            days_str = ','.join(map(str, days_list))
            fw.write(days_str + '\n')

            avatar_count = ','.join(
                map(lambda day: str(len(self.days[day])), days_list))
            fw.write(avatar_count + '\n')

            def sum_times(day_dict):
                return sum(map(len, day_dict.values()))

            times = ','.join(
                map(lambda day: str(sum_times(self.days[day])), days_list))
            fw.write(times + '\n')
Esempio n. 25
0
def download(json_data,
             save_dir,
             max_len=100000,
             min_len=-1,
             video_format='mp4',
             video_resolution='360p',
             dstfile=None):
    save_dir = utils.get_dir(save_dir)
    count = 0
    nfiles = len(json_data)
    testids = []
    testlabels = []

    with open(dstfile, 'w') as f:

        for i, datum in enumerate(json_data):

            if datum['duration'] > min_len and datum['duration'] < max_len:

                videolink = youtube_link + datum['id']

                try:
                    yt = YouTube(videolink)
                    video = yt.get(video_format, video_resolution)
                    video.filename = datum['id']
                    video.download(save_dir)
                    count += 1
                    print('{:d} : {:d} -- {:s}, len: {:d}'.format(
                        i, nfiles, datum['id'], int(datum['duration'])))
                    videoname = os.path.join(
                        save_dir, '{:s}.{:s}'.format(datum['id'],
                                                     video_format))
                    videolabel = datum['label487']
                    f.write('{:s} {:s}\r\n'.format(
                        videoname, ' '.join(map(str, videolabel))))
                    f.flush()
                except:
                    print('{:s} NOT valid'.format(datum['id']))
                    continue

    print('Done Download, {:d} is downloaded, information saved to '.format(
        count))
Esempio n. 26
0
def download(save_dir, rewrite=False):
    save_dir = utils.get_dir()
    if rewrite:
        utils.clear_dir(save_dir)

    filename = DATA_URL.split(os.pathsep)[-1]
    filepath = os.path.join(save_dir, filename)
    if not os.path.isfile(filepath):

        def _progress(count, block_size, total_size):
            sys.stdout.write('\r>> Downloading %s %.1f%%' %
                             (filename, float(count * block_size) /
                              float(total_size) * 100.0))
            sys.stdout.flush()

        filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
        print()
        statinfo = os.stat(filepath)
        print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')

    tarfile.open(filepath, 'r:gz').extractall(save_dir)
Esempio n. 27
0
def inject(dataset):
    """ Inject mislabels
        Args:
            dataset (dict): dataset dict in config
    """
    # create saving folder
    major_save_dir = utils.makedirs(
        [config.data_dir, dataset["data_dir"] + "_major", 'raw'])
    minor_save_dir = utils.makedirs(
        [config.data_dir, dataset["data_dir"] + "_minor", 'raw'])
    uniform_save_dir = utils.makedirs(
        [config.data_dir, dataset["data_dir"] + "_uniform", 'raw'])

    # load clean data
    clean_path = utils.get_dir(dataset, 'raw', 'raw.csv')
    clean = utils.load_df(dataset, clean_path)
    clean = clean.dropna().reset_index(drop=True)

    major_clean_path = os.path.join(major_save_dir, 'mislabel_clean_raw.csv')
    minor_clean_path = os.path.join(minor_save_dir, 'mislabel_clean_raw.csv')
    uniform_clean_path = os.path.join(uniform_save_dir,
                                      'mislabel_clean_raw.csv')
    clean.to_csv(major_clean_path, index=False)
    clean.to_csv(minor_clean_path, index=False)
    clean.to_csv(uniform_clean_path, index=False)

    label = dataset['label']

    # uniform flip
    uniform = uniform_class_noise(clean, label)
    # pairwise flip
    major, minor = pairwise_class_noise(clean, label)

    major_raw_path = os.path.join(major_save_dir, 'raw.csv')
    minor_raw_path = os.path.join(minor_save_dir, 'raw.csv')
    uniform_raw_path = os.path.join(uniform_save_dir, 'raw.csv')

    major.to_csv(major_raw_path, index=False)
    minor.to_csv(minor_raw_path, index=False)
    uniform.to_csv(uniform_raw_path, index=False)
Esempio n. 28
0
 def __init__(self, args):
     self._make_dataset(args)
     self._make_model(args)
     tc.manual_seed(args.seed)
     if args.cuda and tc.cuda.is_available():
         tc.cuda.manual_seed_all(args.seed)
         if tc.cuda.device_count() > 1:
             self.batch_size = args.batch_size * tc.cuda.device_count()
             self.model = DataParallel(self.model)
         else:
             self.batch_size = args.batch_size
             self.model = self.model.cuda()
     else:
         self.batch_size = args.batch_size
     self._make_optimizer(args)
     self._make_loss(args)
     self._make_metric(args)
     self.num_training_samples = args.num_training_samples
     self.tag = args.tag or 'default'
     self.dump_dir = get_dir(args.dump_dir)
     self.train_logger = get_logger('train.{}.{}'.format(
         self.__class__.__name__, self.tag))
Esempio n. 29
0
def main(argv):
    FLAGS = gflags.FLAGS
    gflags.DEFINE_string('srcfile', 'json/sports1m_test.json',
                         'jason file to read from[json/sports1m_test.json]')
    gflags.DEFINE_string('savedir', 'tmp',
                         'dstination directory to save files[tmp]')
    gflags.DEFINE_string('video_format', 'mp4', 'video format[mp4]')
    gflags.DEFINE_string('video_resolution', '360p',
                         'video resolution 360p, 720p and so on[360p]')
    gflags.DEFINE_integer('min_len', 4, 'Minimal length(4 seconds)')
    gflags.DEFINE_integer('max_len', 30, 'Maximal length (30 seconds)')
    gflags.DEFINE_string(
        'dstfile', None,
        'dstination json file[json/sports1m_test_(duration).json]')
    gflags.DEFINE_boolean('rewrite', True,
                          'rewrite everything in saved dir[True]')
    argv = FLAGS(argv)

    srcfile = FLAGS.srcfile
    dstfile = FLAGS.dstfile

    savedir = os.path.abspath(FLAGS.savedir)
    savedir = utils.get_dir(savedir)

    if FLAGS.rewrite:
        utils.clear_dir(savedir)

    raw_json_data = read_json(srcfile)
    if not dstfile:
        srcname, srcext = os.path.splitext(srcfile)
        dstfile = '{:s}-{:02d}-{:02d}{:s}'.format(srcname, FLAGS.min_len,
                                                  FLAGS.max_len, '.txt')
    download(raw_json_data,
             save_dir=savedir,
             dstfile=dstfile,
             max_len=FLAGS.max_len,
             min_len=FLAGS.min_len)
Esempio n. 30
0
    def out_as_csv(self, filename):
        csv = csv_output.CSVOutPut()
        out_name = os.path.join(utils.get_dir('out'), filename)
        day_list = list(self.days.keys())
        day_list.sort()

        for index, day in enumerate(day_list):
            csv.set(0, index * 2, str(day))
            csv.set(0, index * 2 + 1, str(day))
            day_dict = self.days[day]
            levelDict = {}
            for unique_key, lo in day_dict.items():
                levelDict.setdefault(lo.level, [])
                levelDict[lo.level].append(lo)

            levelList = list(levelDict.keys())
            levelList.sort()

            for _index, level in enumerate(levelList):
                num = len(levelDict[level])
                csv.set(_index + 1, index * 2, str(level))
                csv.set(_index + 1, index * 2 + 1, str(num))

        csv.output(out_name)
Esempio n. 31
0
 def __init__(self, buddy, startx, starty, color = 'green', size=36):
     self.buddy = buddy
     self.nick = buddy.props.nick
     self.score = 0
     self.color = color
     self.size = size
     self.startposition = (startx, starty)
     self.hidden = False
     self.bonusplayers = None
     self.opensprites = sprite.RenderUpdates()
     self.closedsprites = sprite.RenderUpdates()
     self.dirName = get_dir()
     self.open = sprite.Sprite()
     self.closed = sprite.Sprite()
     self._path = self.dirName + "pacman-" + self.color
     img = pygame.image.load(self._path + "-open-right.png")
     self.open.image = pygame.transform.scale(img, (self.size, self.size))
     img = pygame.image.load(self._path + "-closed.png")
     self.closed.image = pygame.transform.scale(img, (self.size, self.size))
     self.opensprites.add(self.open)
     self.closedsprites.add(self.closed)
     self.mouthClosed = 1
     self.supertime = 0
     self.reset()
Esempio n. 32
0
 def get_config_dir(self):
     return utils.get_dir(self.config_file)
Esempio n. 33
0
    def _do_read_config(self, config_file, pommanipext):
        """Reads config for a single job defined by section."""
        parser = InterpolationConfigParser()
        dataset = parser.read(config_file)
        if config_file not in dataset:
            raise IOError("Config file %s not found." % config_file)
        if parser.has_option('common','include'):
            include = parser.get('common', 'include')
            if include is not "":
                sections_ = self.read_and_load(include)
                for section_ in sections_:
                    if parser.has_section(section_):
                        raise DuplicateSectionError( "The config section [%s] is existed in %s and include %s cfg file" % ( section_, config_file, re.split("\\s+", include.strip())[1]))
                parser._sections.update(sections_)

        pom_manipulator_config = {}
        common_section = {}
        package_configs = {}

        if pommanipext and pommanipext != '' and pommanipext != 'None': #TODO ref: remove none check, it is passed over cmd line in jenkins build
            parse_pom_manipulator_ext(pom_manipulator_config, parser, pommanipext)

        if not parser.has_section('common'):
            logging.error('Mandatory common section missing from configuration file.')
            raise NoSectionError, 'Mandatory common section missing from configuration file.'
        common_section['tag'] = parser.get('common', 'tag')
        common_section['target'] = parser.get('common', 'target')
        common_section['jobprefix'] = parser.get('common', 'jobprefix')
        common_section['jobciprefix'] = parser.get('common', 'jobciprefix')
        common_section['jobjdk'] = parser.get('common', 'jobjdk')
        if parser.has_option('common', 'mvnver'):
            common_section['mvnver'] = parser.get('common', 'mvnver')
        if parser.has_option('common', 'skiptests'):
            common_section['skiptests'] = parser.get('common', 'skiptests')
        if parser.has_option('common', 'base'):
            common_section['base'] = parser.get('common', 'base')
        if parser.has_option('common', 'citemplate'):
            common_section['citemplate'] = parser.get('common', 'citemplate')
        if parser.has_option('common', 'jenkinstemplate'):
            common_section['jenkinstemplate'] = parser.get('common', 'jenkinstemplate')
        if parser.has_option('common', 'product_name'):
            common_section['product_name'] = parser.get('common', 'product_name')

        if parser.has_option('common', 'include'):
            common_section['include'] = parser.get('common', 'include')

        common_section['jobfailureemail'] = parser.get('common', 'jobfailureemail')

        config_dir = utils.get_dir(config_file)

        #Jira
        if parser.has_option('common', 'shared_config') and parser.get('common', 'shared_config') is not "":
            parse_shared_config(common_section, config_dir, parser)

        common_section['jobtimeout'] = parser.getint('common', 'jobtimeout')

        common_section['options'] = {}
        # If the configuration file has global properties insert these into the common properties map.
        # These may be overridden later by particular properties.
        if parser.has_option('common', 'globalproperties'):
            common_section['options']['properties'] = dict(x.strip().split('=') for x in parser.get('common', 'globalproperties').replace(",\n", ",").split(','))
        else:
            # Always ensure properties has a valid dictionary so code below doesn't need multiple checks.
            common_section['options']['properties'] = {}
        # The same for global profiles
        if parser.has_option('common', 'globalprofiles'):
            common_section['options']['profiles'] = [x.strip() for x in parser.get('common', 'globalprofiles').split(',')]
        else:
            # Always ensure profiles has a valid list so code below doesn't need multiple checks.
            common_section['options']['profiles'] = []

        if os.path.dirname(config_file):
            config_path = os.path.dirname(config_file)
        else:
            config_path = os.getcwd()
        logging.info("Configuration file is %s and path %s", os.path.basename(config_file), config_path)

        for section in parser.sections():
            config_type = self.read_config_type(parser, section)
            if section == 'common' or config_type == "bom-builder-meta":
                logging.debug ('Skipping section due to meta-type %s', section)
                continue

            self._do_read_section(config_path, os.path.basename(config_file), package_configs, parser, section)

        return (common_section, package_configs, pom_manipulator_config)
Esempio n. 34
0
import thread
from gi.repository import GObject, Gtk
import subprocess
import os
import time
import utils
import pbpulse

steps = ["Préparation","Installation de LibreOffice","Installation de Firefox"]

CMD=os.path.join(utils.get_dir(__file__),"prog.sh")

class ThreadScript(thread.Thread):
    def __init__(self,progress,info,text,action=None):
        thread.Thread.__init__(self)
        self._running = False
        self.progress = progress
        self.info = info
        self.text = text
        self.action = action 
                
    def run(self):
        self._running = True
        self.cmd = subprocess.Popen(CMD,stdout=subprocess.PIPE,universal_newlines=True)
        while self._running:
            for line in self.cmd.stdout:
                line = line[:-1]
                try:
                    self.progress(float(line))
                except ValueError:
                    if line[0] == "#":
Esempio n. 35
0
def get_log_dir():
    logdir = os.path.join(get_dir(__file__), 'log')
    makedirs(logdir)
    return logdir
Esempio n. 36
0
def set_icon(window, icon):
    this_dir = utils.get_dir(__file__)
    window.wm_iconbitmap(os.path.join(this_dir, "img", icon))