def run(self, context): if not self.directory_name.isabsolute(): directory = Path(Path(context['package_project_dir']), self.directory_name) else: directory = Path(context['package_root'], Path(strip_root(self.directory_name))) directory.mkdir(parents=True, mode=self.mode)
def sort_episode(series_name, episode, torrent_path): # Ensure the series directory exists series_dir = Path(SERIES_DIR, series_name) series_dir.mkdir(True) if torrent_path.isdir(): files = [torrent_path.listdir('*.' + ext) for ext in VIDEO_FILES] files = [f for sublist in files for f in sublist] files = remove_samples(files) logging.debug('List of files: {}'.format(files)) if len(files) == 0: logging.critical('No video file found in series directory!') sys.exit(1) src_file = files[0] dst_file = Path(series_dir, series_name + ' - ' + episode + files[0].ext) else: if torrent_path.ext.replace('.', '') not in VIDEO_FILES: logging.warning('Unknown video file extention: {}'.format( torrent_path.ext)) src_file = torrent_path dst_file = Path(series_dir, series_name + ' - ' + episode + \ torrent_path.ext) logging.info('Copying single file to destination: {}'.format( dst_file)) copy_file(src_file, dst_file)
def append(self, key): key_path = Path(str(key)) if key_path.isfile(): with open(str(key_path)) as f: key = f.read() if not isinstance(key, bytes): key = key.encode('utf-8') if key in self: return directory = Path(self.user.path, 'keydir', self.user.name, hashlib.md5(key.strip().split()[1]).hexdigest()) directory.mkdir(parents=True) key_file = Path(directory, "%s.pub" % self.user.name) if key_file.exists() and key_file.read_file() == key: return key_file.write_file(key, mode='wb') self.user.git.commit(['keydir'], 'Added new key for user %s' % self.user.name) super(ListKeys, self).append(key)
def create(dataset, work_dir): # Find all the pages in the dataset img_dir = Path(Path.cwd().ancestor(1), 'data/hwr_data/pages', dataset) ann_dir = Path(Path.cwd().ancestor(1), 'data/charannotations') images = img_dir.listdir('*.jpg') annotations = ann_dir.listdir(dataset + '*.words') files = merge(images, annotations) # Create character segmentations stats = {} for f in files: # Preprocess logging.info("Preprocessing %s", str(f[0])) pagesPathFolder = Path(work_dir, 'pages') pagesPathFolder.mkdir() pagePath = Path(pagesPathFolder, f[0].stem + '.ppm') img = cv2.imread(f[0], cv2.IMREAD_GRAYSCALE) img = preprocess(img) cv2.imwrite(pagePath, img) # Segment segmentPathFolder = Path(work_dir, 'segments') segmentPathFolder.mkdir() e = ET.parse(f[1]).getroot() logging.info("Segmenting %s", str(f[0])) segment(img, e, segmentPathFolder, stats) print_statistics(stats, dataset)
def __init__(self, subject): self.session = subject.copy() self.session['date'] = data.getDateStr() self.session['computer'] = socket.gethostname() self.trials = Trials(**subject) self.load_sounds('stimuli/sounds') self.feedback = {} self.feedback['audio'] = { 0: sound.Sound('stimuli/feedback/incorrect.wav'), 1: sound.Sound('stimuli/feedback/correct.wav'), } self.texts = yaml.load(open('texts.yaml')) self.device = ResponseDevice(gamepad={ 0: 1, 3: 0 }, keyboard={ 'y': 1, 'n': 0 }) data_dir = Path(DATA_FILE.format(**subject)).parent if not data_dir.isdir(): data_dir.mkdir() self.data_file = open(DATA_FILE.format(**subject), 'w', 0) self.write_trial() # write header
def savejson(d): status = d['status'] if status.startswith('Pendent'): status = 'Pendent' carpeta = Path(exportpath, status, d['year'], d['month']) carpeta.mkdir(parents=True) fpath = carpeta.child('{}.json'.format(d['id'])) with open(fpath, 'w') as f: json.dump(d, f, sort_keys=True, indent=4, separators=(',', ': '))
def get_backups_folder(self) -> Path: """Get the path to the local backup folder. Automatically creates folder if does not exist. Returns: path: List of Paths """ backup_folder = Path(settings.BACKUP['folder']) if not backup_folder.exists(): backup_folder.mkdir() return backup_folder
def get(project=None): """Get the data from different experiments. Warning! Experiment directories are expected in a particular location outside of this (words-in-transition) directory. Options are: telephone-app acoustic-similarity learning-sound-names """ if project is None or project == 'telephone-app': app_dir = Path('../telephone-app') snapshot_dir = Path(app_dir, 'words-in-transition') if src_dir.exists(): src_dir.rmtree() copytree(snapshot_dir, src_dir) if project is None or project == 'acoustic-similarity': # src proj_dir = Path('../acoustic-similarity/data') judgments = Path(proj_dir, 'judgments') # dst acoustic_similarity_dir = Path(data_raw, 'acoustic-similarity') if not acoustic_similarity_dir.isdir(): acoustic_similarity_dir.mkdir() # copy the csvs in the root proj data dir for csv in proj_dir.listdir('*.csv'): csv.copy(Path(acoustic_similarity_dir, csv.name)) # concat and save judgments files judgments_csv = Path(acoustic_similarity_dir, 'judgments.csv') judgments = [pd.read_csv(x) for x in judgments.listdir('*.csv')] if judgments: (pd.concat(judgments, ignore_index=True).to_csv(judgments_csv, index=False)) if project is None or project == 'learning-sound-names': src = Path('../learning-sound-names/data') dst = Path(data_raw, 'learning_sound_names.csv') data = pd.concat([pd.read_csv(x) for x in src.listdir('LSN*.csv')]) data['is_correct'] = data.is_correct.astype(int) data.to_csv(dst, index=False) # also get subject info and questionnaire data to_get = ['questionnaire_v1', 'subject_info'] for x in to_get: src_file = Path(src, '{}.csv'.format(x)) dst_file = Path(data_raw, 'learning_sound_names_{}.csv'.format(x)) run('cp {} {}'.format(src_file, dst_file))
def copy_files(context): # copy files from deployments if context["deployment_files_dir"].exists() and context["deployment_files_dir"].isdir(): for i in context["deployment_files_dir"].walk(): rel_path = get_relative_path(i, context["deployment_files_dir"]) target = Path(context["package_project_dir"], rel_path) if i.isdir(): target.mkdir(parents=True, mode=self.mode) elif i.isfile(): local("cp -R %(i)s %(target)s" % locals())
def get(project=None): """Get the data from different experiments. Warning! Experiment directories are expected in a particular location outside of this (words-in-transition) directory. Options are: telephone-app acoustic-similarity learning-sound-names """ if project is None or project == 'telephone-app': app_dir = Path('../telephone-app') snapshot_dir = Path(app_dir, 'words-in-transition') if src_dir.exists(): src_dir.rmtree() copytree(snapshot_dir, src_dir) if project is None or project == 'acoustic-similarity': # src proj_dir = Path('../acoustic-similarity/data') judgments = Path(proj_dir, 'judgments') # dst acoustic_similarity_dir = Path(data_raw, 'acoustic-similarity') if not acoustic_similarity_dir.isdir(): acoustic_similarity_dir.mkdir() # copy the csvs in the root proj data dir for csv in proj_dir.listdir('*.csv'): csv.copy(Path(acoustic_similarity_dir, csv.name)) # concat and save judgments files judgments_csv = Path(acoustic_similarity_dir, 'judgments.csv') judgments = [pd.read_csv(x) for x in judgments.listdir('*.csv')] if judgments: (pd.concat(judgments, ignore_index=True) .to_csv(judgments_csv, index=False)) if project is None or project == 'learning-sound-names': src = Path('../learning-sound-names/data') dst = Path(data_raw, 'learning_sound_names.csv') data = pd.concat([pd.read_csv(x) for x in src.listdir('LSN*.csv')]) data['is_correct'] = data.is_correct.astype(int) data.to_csv(dst, index=False) # also get subject info and questionnaire data to_get = ['questionnaire_v1', 'subject_info'] for x in to_get: src_file = Path(src, '{}.csv'.format(x)) dst_file = Path(data_raw, 'learning_sound_names_{}.csv'.format(x)) run('cp {} {}'.format(src_file, dst_file))
def dict2dir(dir, dic, mode="w"): dir = Path(dir) if not dir.exists(): dir.mkdir() for filename, content in dic.items(): p = Path(dir, filename) if isinstance(content, dict): dict2dir(p, content) continue f = open(p, mode) f.write(content) f.close()
def process(appname): appdir = Path(appname) if not appdir.isdir(): print("Error: there is no app called {0}.".format(appdir)) sys.exit(1) # else static = Path(appname, 'static', appname) static.mkdir(True) templates = Path(appname, 'templates', appname) templates.mkdir(True) urls = Path(appname, 'urls.py') if not urls.isfile(): urls.write_file(urls_py)
class cwd(object): def __init__(self, cwd): self.prev_cwd = FSPath.cwd() self.cwd = Path(cwd) if not self.cwd.exists(): self.cwd.mkdir(parents=True) def __enter__(self): self.cwd.chdir() return self.cwd def __exit__(self, type_, value, traceback): self.prev_cwd.chdir()
def segment(img, annotation, work_dir, stats): sides = ['left', 'top', 'right', 'bottom'] # Parse the given sentences for sentence in annotation: for word in sentence: for char in word: c = char.get('text') if c in '!-,.': continue # Skip stupid labels cdir = Path(work_dir, c) cdir.mkdir() f = Path(cdir, str(uuid.uuid1()) + '.ppm') rect = {side: int(char.get(side)) for side in sides} # Correct for swapped coordinates if rect['top'] > rect['bottom']: rect['top'], rect['bottom'] = rect['bottom'], rect['top'] if rect['left'] > rect['right']: rect['left'], rect['right'] = rect['right'], rect['left'] cropped_im = img[rect['top']:rect['bottom'], rect['left']:rect['right']] # Remove rows from the top if they're white while cropped_im.shape[1] > 0 and cropped_im.shape[0] > 0 \ and min(cropped_im[0,:]) == 255: cropped_im = cropped_im[1:,:] # Remove from the bottom while cropped_im.shape[1] > 0 and cropped_im.shape[0] > 0 \ and min(cropped_im[-1,:]) == 255: cropped_im = cropped_im[:-1,:] # Remove from the left while cropped_im.shape[1] > 0 and cropped_im.shape[0] > 0 \ and min(cropped_im[:,0]) == 255: cropped_im = cropped_im[:,1:] # Remove from the right while cropped_im.shape[1] > 0 and cropped_im.shape[0] > 0 \ and min(cropped_im[:,-1]) == 255: cropped_im = cropped_im[:,:-1] if cropped_im.shape[0] <= 5 or cropped_im.shape[1] <= 5: print "Discarding image" continue aspect_ratio = cropped_im.shape[0] / float(cropped_im.shape[1]) if not (1/3.0) <= aspect_ratio <= 5.5: print "Image with wrong aspect ratio:", aspect_ratio, c continue cv2.imwrite(f, cropped_im) # Add to statistics if c not in stats.keys(): stats[c] = {'width': [], 'height': []} stats[c]['width'].append(cropped_im.shape[1]) stats[c]['height'].append(cropped_im.shape[0])
def download_survey_responses(survey_name): """Download the survey data. Args: survey_name: 'sound_similarity_6' or 'sound_similarity_4' """ qualtrics = Qualtrics(**get_creds()) responses = qualtrics.get_survey_responses(survey_name) survey_dir = Path(exp_dir, survey_name) if not survey_dir.exists(): survey_dir.mkdir() output = Path(survey_dir, survey_name + '.csv') responses.to_csv(output, index=False)
def create(): # create a clean temporary directory work_dir = Path("tmp") work_dir.mkdir() lexicon = create_lexicon() lexicon_path = Path(work_dir, "lexicon.csv") w = csv.writer(open(lexicon_path, "w")) for key, val in lexicon.items(): w.writerow([key, val]) return lexicon return lexicon
def setup_report_dir(app_name, random_dir=True): if random_dir: date_dir = time.strftime('%y-%m-%d') date_dir += "-" date_dir += ''.join( random.choice(string.ascii_lowercase + string.digits) for _ in range(6)) else: date_dir = "" report_dir = Path(Path.cwd(), [app_name, date_dir]) report_dir.mkdir(parents=True) return report_dir
def gather(ctx): """Gather the experiment data and put it in the R pkg data-raw folder. Currently set to get the fourth run experiment data. """ dest_dir = Path('propertyverificationdata', 'data-raw', 'question_first', 'fourth_run', 'data') if not dest_dir.exists(): dest_dir.mkdir(parents=True) data_files = Path('experiment/data').listdir('PV*csv') for data_file in data_files: dest = Path(dest_dir, data_file.name) run('cp {src} {dest}'.format(src=data_file, dest=dest)) # move the subj info sheet run('cp experiment/subj_info.csv {}'.format(dest_dir.parent))
def copy_sounds(ctx, force=False): """Copy sounds from acoustic-similarity to use in this experiment.""" src_dir = Path('../acoustic-similarity/data/sounds') assert src_dir.isdir(), 'expecting sounds to be in {}'.format(src_dir) dst_dir = Path('stimuli/sounds') if not dst_dir.isdir(): dst_dir.mkdir() trials = pandas.read_csv('stimuli/messages.csv') for seed_id in trials.seed_id.unique(): seed_name = '{}.wav'.format(seed_id) dst = Path(dst_dir, seed_name) if force or not dst.exists(): Path(src_dir, seed_name).copy(dst)
def main(): global conn opts, args = parser.parse_args() common.init_logging(opts.log_sql) if opts.debug: log.setLevel(logging.DEBUG) if len(args) != 3: parser.error("wrong number of command-line arguments") site, dburl, output_dir = args engine = sa.create_engine(dburl) log.debug("Starting") conn = engine.connect() output_dir = Path(output_dir) output_dir.mkdir() output_raw_referers(site, conn, output_dir, True) output_raw_referers(site, conn, output_dir, False) output_search_terms(site, conn, output_dir)
def main(): if len(sys.argv) != 2 or sys.argv[1] not in ['KNMP', 'Stanford', 'both']: print("Usage: python %s <dataset>" % sys.argv[0]) print("\tDataset should be either 'KNMP' or 'Stanford' or 'both'") sys.exit(1) # create a clean temporary directory if os.path.exists("tmp"): shutil.rmtree("tmp") os.makedirs("tmp") work_dir = Path("tmp") work_dir.rmtree() work_dir.mkdir() if sys.argv[1] in ['KNMP', 'both']: create('KNMP', work_dir) if sys.argv[1] in ['Stanford', 'both']: create('Stanford', work_dir)
def convert_wav_to_mp3(src_dir=None, dst_dir=None): """Convert wav sounds to mp3.""" if src_dir is None: src_dir = seeds_dir if dst_dir is None: dst_dir = src_dir src_dir = Path(src_dir) assert src_dir.exists() dst_dir = Path(dst_dir) if not dst_dir.exists(): dst_dir.mkdir(True) wav_seeds = Path(src_dir).listdir('*.wav', names_only=True) for wav in wav_seeds: mp3 = Path(wav).stem + '.mp3' cmd = 'ffmpeg -i {} -codec:a libmp3lame -qscale:a 2 {}' local_run(cmd.format(Path(src_dir, wav), Path(dst_dir, mp3)))
def env_create_package_files(workspace, packages, files): for p in packages: if files: for f in files: f = Path(workspace, 'test', p, f) f.mkdir(parents=True) version = '.'.join([x for x in f.name if x.isdigit()]) i = env_create_package_pkginfo(p, version) Path(f, '%s.egg-info' % f.name).mkdir() Path(f, 'PKG-INFO').write_file(i) Path(f, '%s.egg-info' % f.name, 'PKG-INFO').write_file(i) os.chdir(f.parent) tar = tarfile.open('%s.tar.gz' % f, 'w:gz') tar.add('%s' % f.name) tar.close() f.rmtree() else: Path(workspace, 'test', p).mkdir(parents=True)
def sort_movie(movie_name, movie_year, torrent_path): movie_dir = Path(MOVIE_DIR, movie_name + ' (' + movie_year + ')') movie_dir.mkdir(True) if torrent_path.isdir(): files = list_files(torrent_path, VIDEO_FILES) logging.debug('List of files: {}'.format(files)) # Remove videos that are not part of the movie videos = [] for f in files: if any(n.lower() in f.stem.lower() for n in movie_name.split()): videos.append(f) if len(videos) < len(files) and len(videos) > 0: files = videos if len(files) == 0: logging.critical('No video files found in movie directory!') sys.exit(1) elif len(files) == 1: src_file = files[0] dst_file = Path(movie_dir, movie_name + files[0].ext) logging.info('Copying single file to destination: {}'.format( dst_file)) copy_file(src_file, dst_file) elif len(files) > 1: i = 1 for f in files: dst_file = Path(movie_dir, movie_name + ' - CD' + str(i) + \ f.ext) logging.info('Copying part {} from {} to {}'.format(i, f, dst_file)) copy_file(f, dst_file) else: if torrent_path.ext not in VIDEO_FILES: logging.warning('Unknown video file extention: {}'.format( torrent_path.ext)) src_file = torrent_path dst_file = Path(movie_dir, movie_name + torrent_path.ext) logging.info('Copying single file to destination: {}'.format( dst_file)) copy_file(src_file, dst_file)
def setup_test_sdist(): if len(env.demo_databases) == 0: return ve_path = Path(env.temp_dir, 'test_sdist') #~ if ve_path.exists(): ve_path.rmtree() #~ rmtree_after_confirm(ve_path) ve_path.mkdir() script = ve_path.child('tmp.sh') context = dict(name=env.SETUP_INFO['name'], sdist_dir=env.sdist_dir, ve_path=ve_path) #~ file(script,'w').write(TEST_SDIST_TEMPLATE % context) txt = TEST_SDIST_TEMPLATE % context for db in env.demo_databases: txt += "django-admin.py test --settings=%s --traceback\n" % db script.write_file(txt) script.chmod(0o777) with lcd(ve_path): local(script)
def get_audio_features(mp3_name, interval_size = 5000.0): analyses_dir = Path(settings.ANALYSES_DIR) if not analyses_dir.exists(): analyses_dir.mkdir() converted_wav = _convert_mp3_to_wav(mp3_name) chunks_of_wav = _break_wav_into_chunks(converted_wav) values = list() for chunk in chunks_of_wav: segment = pydub.AudioSegment.from_file(chunk, format = 'wav') moments = pydub.utils.make_chunks(segment, interval_size) del segment dbfs_of_peaks = [chunk.dBFS for chunk in moments] del moments values.extend([max(0, loudness + 120) for loudness in dbfs_of_peaks]) times = [i * (interval_size / 1000.0) for i in range(len(values))] return zip(times, values)
def create_folder(): base = Path(current_app.config.get('INUPYPI_REPO', Path('.', 'packages'))) path = sanitize_path(request.form.get('path', '')) folder = sanitize_path(request.form.get('folder_name', '')) if Path(path, folder): temp_base = Path(base, path).absolute() create_path = Path(temp_base, folder) if create_path.exists(): return redirect('%s' % Path(path, folder)) search = search_path(folder, temp_base) if search: base = Path(base).absolute() return redirect('%s' % sanitize_path(search.replace(base, ''))) try: create_path.mkdir(parents=True) except Exception, e: status = 'Failed to create %s' % e abort(500, status)
def download_file(downloadable_url): """ File is only downloaded if it doesn't exist in DOWNLOADS_DIR This function does not rename the file. It only downloads the file if the expected name is not present in the DOWNLOADS_DIR. Returns a django.core.files.File object that can be stored in a FileField. """ download_dir = Path(settings.DOWNLOADS_DIR) if not download_dir.exists(): download_dir.mkdir() name_in_url = Path(downloadable_url).name expected_loc = Path(download_dir, name_in_url) # only download if necessary if not expected_loc.exists(): response = requests.get(downloadable_url, stream = True) with open(expected_loc, 'wb') as expected_loc_handle: for chunk in response.iter_content(chunk_size = 1024): expected_loc_handle.write(chunk) return File(open(expected_loc, 'rb'))
def setup_dir(dir): report_dir = Path(dir) report_dir.mkdir(parents=True) prod = Path(report_dir, "prod") prod.mkdir() sandbox = Path(report_dir, "sandbox") sandbox.mkdir() return report_dir
def append(self, key): key_path = Path(key) if key_path.isfile(): with open(str(key_path)) as f: key = f.read() if key in self: return directory = Path(self.user.path, 'keydir', self.user.name, hashlib.md5(key.strip().split()[1]).hexdigest()) directory.mkdir(parents=True) key_file = Path(directory, "%s.pub" % self.user.name) if key_file.exists() and key_file.read_file() == key: return key_file.write_file(key) self.user.git.commit(['keydir'], 'Added new key for user %s' % self.user.name) super(ListKeys, self).append(key)
def create_photo_directory(headline, *datetime_obj): """ Given a datetime object, create a directory in portfolio/static/portfolio/media labeled YEAR_MONTH_DAY_HEADLINE input: python datetime object output: boolean indicating success or failure of directory creation """ # TODO: This should create a dictionary in the project root # --> Make sure that the test is updated if not datetime_obj: datetime_obj = datetime.now() underscore_headline = headline.replace(' ', '_') post_dir = datetime_obj.strftime('%Y_%m_%d_{}'.format(underscore_headline)) media_dir = Path('./portfolio/static/portfolio/media/{}'.format(post_dir)) return media_dir.mkdir()
print('Usage: python {} <image> <words>'.format(sys.argv[0])) sys.exit(1) img_file = sys.argv[1] word_file = sys.argv[2] logger.info('Loading pretrained models...') with open('tmp/svm.pickle', 'r') as f: svm = pickle.load(f) logger.info('Loading image and words file') img = cv2.imread(img_file, cv2.IMREAD_GRAYSCALE) img = prep.preprocess(img) xml = ET.parse(word_file).getroot() out_dir.mkdir() for f in out_dir.walk('*'): f.remove() logger.info('Starting to create a split file') for sentence in xml: for word in sentence: text = word.get('text') print(text) #if '@' in text or len(text) < 6: if text != 'buton': continue # Skip short words # Get the word image rect = {side: int(word.get(side)) for side in sides} word_img = img[rect['top']:rect['bottom'], rect['left']:rect['right']]
class BatchWriter(object): def __init__(self, data_path, img_size=32, channels=3, max_batch_size=10000): self.data_path = Path(data_path).child('batches') if not self.data_path.exists(): self.data_path.mkdir(parents=True) self.img_size = img_size self.channels = channels self.max_batch_size = max_batch_size self.datasets = {} self.next_batch = 1 self.train_range = None self.test_range = None def prepare_training(self, train_dset, test_dset): if train_dset is not None: train_data = self.preprocess_data(train_dset.data) train_batch_size = self.calculate_batch_size(train_data.shape[0]) self.train_range = self.dump_batches(train_data, train_dset.output, train_dset.filenames, train_batch_size) test_data = self.preprocess_data(test_dset.data) test_batch_size = self.calculate_batch_size(test_data.shape[0]) self.test_range = self.dump_batches(test_data, test_dset.output, test_dset.filenames, test_batch_size) if train_dset is None: label_names = test_dset.labels.copy() data = test_data batch_size = test_batch_size else: label_names = train_dset.labels.copy() label_names.update(test_dset.labels) data = np.vstack((train_data, test_data)) batch_size = train_batch_size self.dump_meta_batch(data, label_names, batch_size) def dump_meta_batch(self, data, label_names, batch_size): mean = data.transpose().mean(axis=1).reshape((-1, 1)) data_path = self.data_path.child('batches.meta') self.write_cifar_meta_batch(data_path, mean, label_names, batch_size) def dump_batches(self, data, output, filenames, batch_size): start_batch = self.next_batch for i, mark in enumerate(range(0, data.shape[0], batch_size)): slice_ = slice(mark, mark + batch_size) self.write_cifar_batch( self.data_path.child('data_batch_' + str(self.next_batch)), data[slice_].transpose(), output[slice_], filenames[slice_]) self.next_batch += 1 return start_batch, self.next_batch - 1 def write_cifar_batch(self, data_path, data, labels, filenames): data = { 'batch_label': '', 'labels': labels, 'data': data, 'filenames': filenames, } with open(data_path, 'wb') as f: cPickle.dump(data, f) def write_cifar_meta_batch(self, data_path, mean, label_names, batch_size): data = { 'data_mean': mean, 'label_names': label_names, 'num_cases_per_batch': batch_size, 'num_vis': mean.shape[0] } with open(data_path, 'wb') as f: cPickle.dump(data, f) def preprocess_data(self, dset_data): dset_size = int(np.sqrt(dset_data.shape[1] / self.channels)) data = np.empty((dset_data.shape[0], self.img_size**2 * self.channels), dtype=np.uint8) if self.img_size != dset_size: for i in range(data.shape[0]): image = ConvImage.from_array(dset_data[i], self.channels, dset_size) image.to_size(self.img_size) data[i] = image.to_array() return data return dset_data def get_data_options(self): return [ '--data-path=%s' % self.data_path, '--train-range=%s-%s' % self.train_range, '--test-range=%s-%s' % self.test_range, '--img-size=%s' % self.img_size, '--data-provider=cifar' ] def get_data_options_test(self): return [ '--data-dir=%s' % self.data_path, '--test-range=%s-%s' % self.test_range, '--is-dataset=1' ] def calculate_batch_size(self, data_size): if data_size % self.max_batch_size == 0: return self.max_batch_size c = data_size / self.max_batch_size + 1 return data_size / c
def test_mkdir_and_rmdir_with_parents(self): abc = Path(self.d, "a", "b", "c") abc.mkdir(parents=True) assert abc.isdir() abc.rmdir(parents=True) assert not Path(self.d, "a").exists()
#! coding=utf-8 import datetime, smtplib, imp, os from email.mime.text import MIMEText import requests from git import Repo from git.errors import * from unipath import Path from jinja2 import Template # Local settings are stored in $HOME/.ometrics/settings.py # Find and load module there, creating files/dirs where appropriate om = Path( os.environ['HOME'] ).absolute().child('.ometrics') om.mkdir() module_path = om.absolute() if not om.child('settings.py').exists(): print('Copying setitngs.py into %s' % str(om.child('settings.py'))) Path(__file__).parent.child('settings.py').copy(om.child('settings.py')) imp.load_source('settings', module_path.child('settings.py')) from settings import * from template import HTML_MAIL now = datetime.datetime.utcnow() gap = datetime.timedelta(days=INTERVAL_DAYS) begins = now - gap
class ProjectInstaller(Installer): flavor = 'django_custom' git_repo = 'https://github.com/Libermentix/project_skeletton_directory.git' def __init__(self, project_dir, project_name, db_sudo=False, db_sudo_user=None, *args, **kwargs): super(ProjectInstaller, self).__init__( project_dir, project_name, *args, **kwargs ) self.var_dict = dict( project_dir=add_trailing_slash(project_dir), project_name=add_trailing_slash(project_name) ) self._tmp_dir = None if self.install_path.exists(): self.install_path.rmtree() self.db_installer = DatabaseInstaller( project_dir=project_dir, project_name=project_name, sudo=db_sudo, sudo_user=db_sudo_user ) self.django_installer = DjangoInstaller( project_dir=project_dir, project_name=project_name ) def run(self): self.run_prepare_configuration() self.run_create_configuration() self.run_post_create_configuration() def run_prepare_configuration(self): self.get_git_repo() self.install_skeletton() self.install_requirements() def run_post_create_configuration(self): """ run the the post_run_command_stack """ self.db_installer() self.django_installer() self.move_to_venv(which_one='postactivate') self.move_to_venv(which_one='activate') self.finish_queued_commands() #run the post create configuration command for the children for item in self.db_installer.post_run_command_stack: # should be a callable or None if item: item() for item in self.django_installer.post_run_command_stack: # should be a callable or None if item: logger.info('%s: Executing a django_installer_script ...' % item) item() @property def requirements_file(self): return Path( self.install_path, 'requirements', 'base.txt' ).absolute() @property def repo_dir(self): #get last directory = self.git_repo.split('/')[-1:][0] #remove .git directory = directory.split('.')[0] return Path(self._tmp_dir, directory) def create_tmp_dir(self): # TODO: # Account for existing project paths, here it should ask to remove # or abort. self._tmp_dir = Path(self.install_path, 'tmp') self._tmp_dir.mkdir() self._tmp_dir.chdir() def delete_tmp_dir(self): self.project_dir.chdir() self._tmp_dir.rmtree() def get_git_repo(self): self.create_tmp_dir() logger.info('Cloning repository ...') if self.repo_dir.exists(): logger.info('Repo dir exists removing it...') self.repo_dir.rmtree() git.Git().clone(self.git_repo) logger.info('..done') def install_skeletton(self): logger.info('Installing %s' % self.flavor) source = Path(self.repo_dir, self.flavor) #move all items in the directory into the install_path for item in source.listdir(): item.move(self.install_path) self.delete_tmp_dir() logger.info('...done') def install_virtualenv(self): """ Calls a script that creates the virtual environment and installs its dependencies, currently only sports python2.7 support. """ exec_path = Path(Path(__file__).parent, 'bash', 'installer.sh') command = '%s %s %s %s' % (exec_path, self.install_path, self.project_name, self.requirements_file) logger.info('Installing virtualenv... (calling %s)' % command) self.run_command(command) def install_requirements(self): if not self.is_envwrapper: self.install_virtualenv() else: # we can assume that we are in the virtualenv now, and mkproject # was called command = 'pip install -r %s' % self.requirements_file self.run_command(command)
def set_data_dir(self): # 201811 초기셋팅 # DATA ROOT (Level 1) data_root = Path(self.dataPath) data_root.mkdir() # DATA IN/OUT/TMP (Level 2) data_input_path = Path(data_root.rstrip("/") + "/" + "INPUT") data_input_path.mkdir() data_output_path = Path(data_root.rstrip("/") + "/" + "OUTPUT") data_output_path.mkdir() data_tmp_path = Path(data_root.rstrip("/") + "/" + "TMP") data_tmp_path.mkdir() # DATA (INPUT) Equv ID data_input_eid_path = Path(data_input_path.rstrip("/") + "/" + self.app_id.upper()) data_input_eid_path.mkdir() data_input_rid_path = Path(data_input_eid_path.rstrip("/") + "/" + self.aimsId.upper()) data_input_rid_path.mkdir() data_input_lid_path = Path(data_input_rid_path.rstrip("/") + "/" + self.hostId.upper()) data_input_lid_path.mkdir() # DATA (INPUT) CSV, JSON, IMAGE, VIDEO (Level 3) data_input_csv_path = Path(data_input_lid_path.rstrip("/") + "/" + "CSV") data_input_csv_path.mkdir() data_input_json_path = Path(data_input_lid_path.rstrip("/") + "/" + "JSON") data_input_json_path.mkdir() data_input_image_path = Path(data_input_lid_path.rstrip("/") + "/" + "IMAGE") data_input_image_path.mkdir() data_input_video_path = Path(data_input_lid_path.rstrip("/") + "/" + "VIDEO") data_input_video_path.mkdir() # DATA (tmp) Equv ID data_tmp_eid_path = Path(data_tmp_path.rstrip("/") + "/" + self.app_id.upper()) data_tmp_eid_path.mkdir() data_tmp_rid_path = Path(data_tmp_eid_path.rstrip("/") + "/" + self.aimsId.upper()) data_tmp_rid_path.mkdir() data_tmp_lid_path = Path(data_tmp_rid_path.rstrip("/") + "/" + self.hostId.upper()) data_tmp_lid_path.mkdir() # DATA (tmp) CSV, JSON, IMAGE, VIDEO (Level 3) data_tmp_csv_path = Path(data_tmp_lid_path.rstrip("/") + "/" + "CSV") data_tmp_csv_path.mkdir() data_tmp_json_path = Path(data_tmp_lid_path.rstrip("/") + "/" + "JSON") data_tmp_json_path.mkdir() data_tmp_image_path = Path(data_tmp_lid_path.rstrip("/") + "/" + "IMAGE") data_tmp_image_path.mkdir() data_tmp_imagecrp_path = Path(data_tmp_image_path.rstrip("/") + "/" + "CROP") data_tmp_imagecrp_path.mkdir() # logger.debug("CROP dir :{}".format(data_tmp_imagecrp_path)) data_tmp_imageinten_path = Path(data_tmp_image_path.rstrip("/") + "/" + "ITENSITY") data_tmp_imageinten_path.mkdir() # logger.debug("Intensity dir :{}".format(data_tmp_imageinten_path)) data_tmp_video_path = Path(data_tmp_lid_path.rstrip("/") + "/" + "VIDEO") data_tmp_video_path.mkdir() # DATA (output) Equv ID data_output_eid_path = Path(data_output_path.rstrip("/") + "/" + self.app_id.upper()) data_output_eid_path.mkdir() data_output_rid_path = Path(data_output_eid_path.rstrip("/") + "/" + self.aimsId.upper()) data_output_rid_path.mkdir() data_output_lid_path = Path(data_output_rid_path.rstrip("/") + "/" + self.hostId.upper()) data_output_lid_path.mkdir() # DATA (output) CSV, JSON, IMAGE, VIDEO (Level 3) data_output_csv_path = Path(data_output_lid_path.rstrip("/") + "/" + "CSV") data_output_csv_path.mkdir() data_output_json_path = Path(data_output_lid_path.rstrip("/") + "/" + "JSON") data_output_json_path.mkdir() data_output_image_path = Path(data_output_lid_path.rstrip("/") + "/" + "IMAGE") data_output_image_path.mkdir() data_output_video_path = Path(data_output_lid_path.rstrip("/") + "/" + "VIDEO") data_output_video_path.mkdir() data_out_videointen_path = Path(data_output_video_path.rstrip("/") + "/" + "ITENSITY") data_out_videointen_path.mkdir() data = { "DATA_NAME": ["INPUT_CSV_PATH", "INPUT_JSON_PATH", "INPUT_IMAGE_PATH", "INPUT_VIDEO_PATH", "OUTPUT_CSV_PATH", "OUTPUT_JSON_PATH", "OUTPUT_IMAGE_PATH", "OUTPUT_VIDEO_PATH", "TMP_CSV_PATH", "TMP_JSON_PATH", "TMP_IMAGE_PATH", "TMP_VIDEO_PATH", "TMP_IMAGE_CROP_PATH", "TMP_IMAGE_INTEN_PATH", "OUTPUT_VIDEO_INTEN_PATH" ], "DATA_PATH": [data_input_csv_path, data_input_json_path, data_input_image_path, data_input_video_path, data_output_csv_path, data_output_json_path, data_output_image_path, data_output_video_path, data_tmp_csv_path, data_tmp_json_path, data_tmp_image_path, data_tmp_video_path, data_tmp_imagecrp_path, data_tmp_imageinten_path, data_out_videointen_path ], "USE_YN": ["Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y" ]} self.df_datafile = pd.DataFrame(data, columns=["DATA_NAME", "DATA_PATH", "USE_YN"]) # return self.df_datafile
from unipath import Path # proj_root/py_pkg/settings.py PROJ_ROOT = Path(__file__).ancestor(2).absolute() DATA_DIR = Path(PROJ_ROOT, 'data') if not DATA_DIR.isdir(): DATA_DIR.mkdir() SQLITE_PATH = Path(DATA_DIR, 'article_qualities.sqlite')
class Installer(object): install_path = None postactivate = None postdeactivate = None project_name = None project_dir = None var_dict = {} post_run_command_stack = [] def __init__(self, project_dir, project_name, envwrapper=False, *args, **kwargs): self.project_dir = Path(project_dir).absolute() self.project_name = project_name self.is_envwrapper = envwrapper # make all attributes overridable so that external applications # can make use of the pattern and reset the variable names for k, v in six.iteritems(kwargs): setattr(self, k, v) self.install_path = Path(self.project_dir, project_name) self.install_path.mkdir() self.install_path.chdir() self._environment_cache = False self._template_dir_cache = False self._template_cache = False @property def venv_folder(self): """ extracts the venv folder from the environment variables ($WORKON_HOME, to be precise and combines it with the project name. """ path = os.environ.copy().get('WORKON_HOME') if path: return Path(path) else: return None @property def template_env(self): """ provides the template environment """ if not getattr(self, '_environment_cache', False): self._environment_cache = Environment( loader=FileSystemLoader(self.get_template_dir()) ) return self._environment_cache def run_command(self, command, blocking=False): command = Command(command) command() if blocking: logger.debug('Waiting for command to finish...') command.wait() return True def finish_queued_commands(self): finish_queued_commands() def get_installer_name(self): return self.__class__.__name__.lower() def get_template_dir(self): if not getattr(self, '_template_cache', False): self._template_dir_cache = Path(Path(__file__).parent, 'templates') return self._template_dir_cache def get_template(self, which_one): """ provides a wrapper around jinja2 get_template. Caches the result. returns a cached template """ if not getattr(self, '_template_cache', False): self._template_cache = dict() if not self._template_cache.get(which_one, False): template_file = '%s.%s.sh' % (self.get_installer_name(), which_one) self._template_cache[which_one] = \ self.template_env.get_template(template_file) return self._template_cache[which_one] def run_prepare_configuration(self): raise NotImplementedError('Must be implemented in subclass') def render_config_for_file_template(self, which_one): logger.info('preparing config variables for %s ...' % which_one) template = self.get_template(which_one=which_one) contents = template.render(**self.var_dict) setattr(self, '%s' % which_one, contents) logger.info('...done') def create_file(self, which_one): self.render_config_for_file_template(which_one=which_one) logger.info('Creating config files in parent dir: %s' % self.install_path) #gets self.postdeactivate if which_one=postdeactivate contents = getattr(self, which_one) logger.info('%s: Writing contents to file ...' % which_one) p = Path(self.install_path, which_one) #write configuration and append it to the file p.write_file(contents, 'a+') logger.info('...done') def move_to_venv(self, which_one): """ Moves the created config_files into the bin folder to be executed. Does this by first pasting all the contents of the temporary file into the new or existing target file and then deleting the temp file. """ target = Path(self.venv_folder, self.project_name, 'bin', which_one) source = Path(self.install_path, which_one) logger.info('target: %s, move_orig: %s' % (target, source)) if source.exists(): logger.info('Moving %s into place ...' % which_one) content = source.read_file() #make sure the directory exists if not target.parent.exists(): target.parent.mkdir(parents=True) target.write_file(content, 'w+') source.remove() logger.info('...done') def run_create_configuration(self): self.create_file(which_one='postactivate') self.create_file(which_one='postdeactivate') def run_post_create_configuration(self): pass def run(self): self.run_prepare_configuration() self.run_create_configuration() self.run_post_create_configuration() def __call__(self, *args, **kwargs): self.run()
class Base(object): """ set and make directory Parameters ---------- home : str set a directory as home """ def __init__(self, home='.'): """ set home directory Parameters ---------- home : str set a directory as home Returns ------- """ self._home = Path(home).absolute() def __str__(self): return self.home def __repr__(self): return '%s(%r)' % (self.__class__.__name__, self.home) # def __abs(self, string): # return os.path.abspath(string) @property def home(self): return self._home.__str__() @home.setter def home(self, path): self._home = Path(path).absolute() def make_home(self, force=False): """ make home directory Parameters ---------- force : bool if True, if home exists and is a dir that containing contents, then delete contents in it, if exists and not a dir, remove it and make dir Returns ------- """ self.__mkdir(force) def __mkdir(self, force=False): if self._home.exists(): if not self._home.isdir(): if not force: raise Exception('%s exists but is not a dir' % self.home) self._home.remove() self._home.mkdir() if force: self._home.rmtree() self._home.mkdir() else: self._home.mkdir(parents=True) def __rmdir(self, force=False): if self._home.exists(): if not self._home.isdir(): if not force: raise Exception('%s exists but is not a dir' % self.home) self._home.remove() if force: self._home.rmtree() else: self._home.rmdir() def rm_home(self, force=False): """ remove home directory Parameters ---------- force : bool if True, if home exists and is a dir that containing contents, then delete it and it's contents, if exists and not a dir, remove then Returns ------- """ self.__rmdir(force)
def _tidy_survey(survey_name): # Inputs survey_csv = Path(exp_dir, survey_name, survey_name + '.csv') survey = pd.read_csv(survey_csv, skiprows=[ 0, ]) loop_merge_csv = Path(exp_dir, survey_name, 'loop_merge.csv') loop_merge = pd.read_csv(loop_merge_csv) # Outputs survey_dir = Path(out_dir, survey_name) if not survey_dir.exists(): survey_dir.mkdir() bad_subjs_csv = Path(survey_dir, 'bad_subjs.csv') odd_one_out_csv = Path(survey_dir, 'odd_one_out.csv') # Begin tidying id_col = 'workerId' # label the workers who passed the catch trial survey.loc[:, 'describe_catch'].fillna('', inplace=True) survey['failed_catch_trial'] = ~survey.describe_catch.str.contains( 'piano', case=False) # export the subjects to deny payment survey.ix[survey.failed_catch_trial].to_csv(bad_subjs_csv, index=False) # label the workers who reported problems with audio is_problem_col = survey.columns.str.contains('problems\ ') problem_cols = survey.columns[is_problem_col].tolist() problem = pd.melt(survey, id_col, problem_cols, var_name='qualtrics', value_name='problem_with_audio') problem['loop_merge_row'] = problem.qualtrics.str.extract( '\((\d)\)$').astype(int) problem['problem_with_audio'] = problem.problem_with_audio.fillna( False).astype(bool) problem.drop('qualtrics', axis=1, inplace=True) # combine filters subjs = pd.merge(survey[[id_col, 'failed_catch_trial']], problem) subjs['failed_catch_trial'] = subjs.failed_catch_trial.astype(int) subjs['problem_with_audio'] = subjs.problem_with_audio.astype(int) # tidy the survey data is_odd_col = survey.columns.str.contains('odd_one_out\ ') odd_cols = survey.columns[is_odd_col].tolist() odd = pd.melt(survey, id_col, odd_cols, var_name='qualtrics', value_name='odd_one_out') odd['loop_merge_row'] = odd.qualtrics.str.extract('\((\d)\)$').astype(int) file_map = pd.melt(loop_merge.drop('loop_merge_row', axis=1), 'category', var_name='odd_one_out', value_name='url') file_map['odd_one_out'] = file_map.odd_one_out.astype(int) file_map['filename'] = file_map.url.apply(lambda x: Path(x).name) file_map.drop('url', axis=1, inplace=True) odd = odd.merge(loop_merge[['category', 'loop_merge_row']]) odd = odd.merge(file_map) odd = odd.merge(subjs) odd.sort(['workerId', 'category'], inplace=True) odd = odd[[ 'workerId', 'failed_catch_trial', 'problem_with_audio', 'category', 'filename' ]] odd.to_csv(odd_one_out_csv, index=False)
import re import urllib import logging import httplib2 import datetime from unipath import Path from lxml.html import document_fromstring from dateutil.parser import parse as date_parse from ..config import Config CACHE = Path("~/newsclips2/").expand() CACHE.mkdir(parents=True) HTTP = httplib2.Http(CACHE) class Article(object): def __init__(self, line): self.log = logging.getLogger('newsclips.article') self.url, self.notes = re.search(r'^(https?://[^ ]+) ?(.*)$', line).groups() self.tree = self.get_tree() self.config = Config() def get_tree(self): """ Return the DOM for the article content. Note this actually returns the XPATH method on the tree, so you can do: a.tree(<xpath>) directly. """ quoted_url = urllib.quote(self.url, safe='')
def _tidy_survey(survey_name): # Inputs survey_csv = Path(exp_dir, survey_name, survey_name + '.csv') survey = pd.read_csv(survey_csv, skiprows=[0, ]) loop_merge_csv = Path(exp_dir, survey_name, 'loop_merge.csv') loop_merge = pd.read_csv(loop_merge_csv) # Outputs survey_dir = Path(out_dir, survey_name) if not survey_dir.exists(): survey_dir.mkdir() bad_subjs_csv = Path(survey_dir, 'bad_subjs.csv') odd_one_out_csv = Path(survey_dir, 'odd_one_out.csv') # Begin tidying id_col = 'workerId' # label the workers who passed the catch trial survey.loc[:, 'describe_catch'].fillna('', inplace=True) survey['failed_catch_trial'] = ~survey.describe_catch.str.contains( 'piano', case=False ) # export the subjects to deny payment survey.ix[survey.failed_catch_trial].to_csv(bad_subjs_csv, index=False) # label the workers who reported problems with audio is_problem_col = survey.columns.str.contains('problems\ ') problem_cols = survey.columns[is_problem_col].tolist() problem = pd.melt(survey, id_col, problem_cols, var_name = 'qualtrics', value_name = 'problem_with_audio') problem['loop_merge_row'] = problem.qualtrics.str.extract('\((\d)\)$').astype(int) problem['problem_with_audio'] = problem.problem_with_audio.fillna(False).astype(bool) problem.drop('qualtrics', axis=1, inplace=True) # combine filters subjs = pd.merge(survey[[id_col, 'failed_catch_trial']], problem) subjs['failed_catch_trial'] = subjs.failed_catch_trial.astype(int) subjs['problem_with_audio'] = subjs.problem_with_audio.astype(int) # tidy the survey data is_odd_col = survey.columns.str.contains('odd_one_out\ ') odd_cols = survey.columns[is_odd_col].tolist() odd = pd.melt(survey, id_col, odd_cols, var_name = 'qualtrics', value_name = 'odd_one_out') odd['loop_merge_row'] = odd.qualtrics.str.extract('\((\d)\)$').astype(int) file_map = pd.melt(loop_merge.drop('loop_merge_row', axis=1), 'category', var_name='odd_one_out', value_name='url') file_map['odd_one_out'] = file_map.odd_one_out.astype(int) file_map['filename'] = file_map.url.apply(lambda x: Path(x).name) file_map.drop('url', axis=1, inplace=True) odd = odd.merge(loop_merge[['category', 'loop_merge_row']]) odd = odd.merge(file_map) odd = odd.merge(subjs) odd.sort(['workerId', 'category'], inplace=True) odd = odd[['workerId', 'failed_catch_trial', 'problem_with_audio', 'category', 'filename']] odd.to_csv(odd_one_out_csv, index=False)