def save_html(data, filename, key): path = join(settings.LOG_DIR, key, filename + '.html') try: write(data, path) except IOError: mkdir_p(dirname(path)) write(data, path)
def make_pull_plot_gen(category, misIDRatios, catRatios): pull_plot = TH1D(category, category, 6, 0, 6 ) others_plot = TH1D(category+"others", category+"others", 6, 0, 6 ) true_plot = TH1D(category+"true", category+"true", 6, 0, 6 ) bin_names = get_all_containers(category) for b in range(1, len(bin_names)+1): pull_plot.GetXaxis().SetBinLabel(b,bin_names[b-1]) #pull_plot.SetAxisRange(-0.006, 0.006,"Y") #others_plot.SetAxisRange(-0.006, 0.006,"Y") others_plot.GetXaxis().SetBinLabel(b,bin_names[b-1]) true_plot.GetXaxis().SetBinLabel(b,bin_names[b-1]) (value, err, err_plus) = catRatios[bin_names[b-1]] pull_plot.SetBinContent(b, value) pull_plot.SetBinError(b, err) other = get_other_component(category, bin_names[b-1]) (valueO, errO, err0_plus) = misIDRatios[other] others_plot.SetBinContent(b, valueO) others_plot.SetBinError(b, errO) true_plot.SetBinContent(b, misIDRatios[category][0]) #print bin_names[b-1], value, valueO pull_plot.Add(others_plot, -1) c = TCanvas("Plot", "Plot", 1920,1080) ROOT.gStyle.SetOptStat(0) true_plot.SetLineColor(ROOT.kRed) true_plot.SetLineWidth(3) true_plot.GetYaxis().SetRangeUser(-0.006, 0.006) true_plot.Draw() pull_plot.SetLineWidth(3) pull_plot.Draw("SAME") mydir = "pull_plots_gen/" mkdir_p(mydir) c.SaveAs("%s/%s_pulls.pdf" % (mydir, category)) c.SaveAs("%s/%s_pulls.png" % (mydir, category))
def build_gdb13_data(): atom_idxs = {'H': 0, 'C': 1, 'N': 2, 'O': 3, 'F': 4} base_path = os.path.join(DATA_BASE_DIR, "gdb13") mkdir_p(base_path) energies = [] atom_counts = [] for name in sorted(os.listdir(os.path.join(base_path, "xyz"))): xyz_path = os.path.join(base_path, "xyz", name) out_path = xyz_path.replace("xyz", "out") natoms = 0 energy = None counts = [0 for _ in atom_idxs] with open(xyz_path, 'r') as xyz_f, open(out_path, 'w') as out_f: for i, line in enumerate(xyz_f): line = line.strip() if not i: natoms = int(line) elif i == 1: energy = float(line.split()[-3]) elif i - 2 < natoms: line = line.replace("*^", "e") ele, x, y, z, _ = line.split() counts[atom_idxs[ele]] += 1 out_f.write("%s %.8f %.8f %.8f\n" % (ele, float(x), float(y), float(z))) energies.append(energy) atom_counts.append(counts) atom_counts = numpy.matrix(atom_counts) atomization = calculate_atomization_energies(atom_counts, numpy.matrix(energies).T) atomization *= HARTREE_TO_KCAL numpy.savetxt(os.path.join(base_path, "energies.txt"), atomization) numpy.savetxt(os.path.join(base_path, "heavy_counts.txt"), atom_counts.sum(1))
def saveTime(time): dir_ = './.Scores/' utils.mkdir_p(dir_) f = open(dir_ + '.Scores.txt', 'a') f.write(str(time) + '\n') f.close()
def sweep(): to_check = [] bioguide = utils.flags().get('bioguide', None) if bioguide: possibles = [bioguide] else: possibles = current_bioguide.keys() for bioguide in possibles: if media_bioguide.get(bioguide, None) is None: to_check.append(bioguide) elif media_bioguide[bioguide]["social"].get(service, None) is None: to_check.append(bioguide) else: pass utils.mkdir_p("cache/social_media") writer = csv.writer(open("cache/social_media/%s_candidates.csv" % service, 'w')) writer.writerow(["bioguide", "official_full", "website", "service", "candidate", "candidate_url"]) if len(to_check) > 0: email_body = "Social media leads found:\n\n" for bioguide in to_check: candidate = candidate_for(bioguide) if candidate: url = current_bioguide[bioguide]["terms"][-1].get("url", None) candidate_url = "https://%s.com/%s" % (service, candidate) row = [bioguide, current_bioguide[bioguide]['name']['official_full'].encode('utf-8'), url, service, candidate, candidate_url] writer.writerow(row) print "\tWrote: %s" % candidate email_body += ("%s\n" % row) if email_enabled: utils.send_email(email_body)
def main(): utils.Initialize() e_name_list = gflags.FLAGS.extractors new_experiment = experiments.StartNewExperiment(e_name_list) experiment_id = new_experiment.GetID() utils.mkdir_p(gflags.FLAGS.reports_dir) utils.mkdir_p(gflags.FLAGS.models_dir) report_loc = path.join(gflags.FLAGS.reports_dir, "%.3d.html" % experiment_id) model_loc = path.join(gflags.FLAGS.models_dir, "%.3d.model" % experiment_id) print "Experiment ID: %d. Detailed report at %s. Model at %s\n" % ( experiment_id, report_loc, model_loc, ) cv_data = LoadCVData() hd_data = LoadHDData() new_experiment.RunCrossValidation(cv_data) model = models.BuildModel(e_name_list, cv_data) model.Save(model_loc) hd_result = model.EvaluateOn(hd_data) new_experiment.RecordHeldoutDataEval(hd_result) new_experiment.Save() new_experiment.PrintSummary() new_experiment.ExportReport(report_loc)
def process_clip(clip_name, paths, img_type, negative_images, overwrite=True): # overwrite: overwrite the training of the FFLD model. print(clip_name) frames_path = paths['clips'] + frames + clip_name + sep if not check_path_and_landmarks(frames_path, clip_name, paths['in_bb'] + clip_name + sep): # check that paths, landmarks exist return list_frames = sorted(os.listdir(frames_path)) save_model = paths['out_model'] + clip_name + '.model' if (not os.path.exists(save_model)) or overwrite: # build the detector training_pos = load_images(list_frames, frames_path, paths['in_bb'], clip_name, max_images=400) if len(training_pos) == 0: print('No positives found for the clip {}, skipping it.'.format(clip_name)) return ps_model = train_ffld2_detector(training_pos, negative_images, n_components=1, n_relabel=6) ps_model.save(save_model) else: print('The model {} already exists and was loaded from disk.'.format(save_model)) ps_model = load_model(save_model) global detector detector = FFLD2Detector(ps_model) p_det_bb = mkdir_p(paths['out_bb'] + clip_name + sep) p_det_landm = mkdir_p(paths['out_lns'] + clip_name + sep) clip = Clip(clip_name, paths['clips'], frames, write_ln=[p_det_bb, p_det_landm]) # TODO: Try parallel model [predict_in_frame(frame_name, clip, img_type) for frame_name in list_frames]
def main(): parser = argparse.ArgumentParser() parser.add_argument('--image_list', required=True) parser.add_argument('--config', required=True) parser.add_argument('--dump_prefix', required=True) parser.add_argument('--output_dir', required=True) args = parser.parse_args() conf_mod = imp.load_source('config', args.config) config = conf_mod.get() model = config['model'] utils.load_model(model, args.dump_prefix) X, _ = conf_mod.get_data(args.image_list) utils.mkdir_p(args.output_dir) image_list = utils.read_image_list(args.image_list) logger.info('compiling model ...') model.compile(loss='mean_squared_error', optimizer=Adam()) for x, (input_path, _) in ProgressBar()(zip(X, image_list)): y = model.predict(np.array([x], dtype='float32'), batch_size=1, verbose=False) img = np.round(y.reshape(y.shape[2:]) * 255.0).astype('uint8') # FIXME: we assume that basenames of images are distinct fname = os.path.basename(input_path) output_path = os.path.join(args.output_dir, fname) cv2.imwrite(output_path, img)
def test_mkdir_p(self): d = mkdtemp() path = os.path.join(d, 'a/b/c') mkdir_p(path) self.assertTrue(os.path.isdir(path)) mkdir_p(path) # Already exists test rmtree(d)
def create_test_case(self): """ Create a test case. """ testyml_template = default_testyml_template.replace( "%role", self.normalized_role) testyml_template = testyml_template.replace( "%year", str(date.today().year)) testyml_template = testyml_template.replace( "%author", self.config["author_name"]) testyml_template = testyml_template.replace( "%email", self.config["author_email"]) utils.mkdir_p(os.path.join(self.output_path, "tests", "inventory", "group_vars")) utils.mkdir_p(os.path.join(self.output_path, "tests", "inventory", "host_vars")) hosts = "placeholder_fqdn\n" utils.string_to_file(os.path.join(self.output_path, "tests", "inventory", "hosts"), hosts) test_file = os.path.join(self.output_path, "tests", "test") utils.string_to_file(test_file, testyml_template) os.chmod(test_file, 0755)
def user_login(self, user): """ Called immediately after a user authenticates successfully. Saves session information in the user's directory. Expects *user* to be a dict containing a 'upn' value representing the username or userPrincipalName. e.g. '[email protected]' or just 'someuser'. Any additional values will be attached to the user object/cookie. """ logging.debug("user_login(%s)" % user["upn"]) user.update(additional_attributes(user)) # Make a directory to store this user's settings/files/logs/etc user_dir = os.path.join(self.settings["user_dir"], user["upn"]) if not os.path.exists(user_dir): logging.info(_("Creating user directory: %s" % user_dir)) mkdir_p(user_dir) os.chmod(user_dir, 0o700) session_file = os.path.join(user_dir, "session") session_file_exists = os.path.exists(session_file) if session_file_exists: session_data = open(session_file).read() try: session_info = tornado.escape.json_decode(session_data) except ValueError: # Something wrong with the file session_file_exists = False # Overwrite it below if not session_file_exists: with open(session_file, "w") as f: # Save it so we can keep track across multiple clients session_info = {"session": generate_session_id()} session_info.update(user) session_info_json = tornado.escape.json_encode(session_info) f.write(session_info_json) self.set_secure_cookie("gateone_user", tornado.escape.json_encode(session_info))
def download_cfo(options): if options.get('loglevel', None): log.setLevel(options['loglevel']) OUT_DIR = os.path.join(CACHE_DIR, 'cfo') if not os.path.exists(OUT_DIR): mkdir_p(OUT_DIR) base_url = 'https://www.campaignfinanceonline.state.pa.us/pages/CFAnnualTotals.aspx' def _get_response_loc_pair(dl_info): filer_id = dl_info loc = os.path.join(OUT_DIR, '{}.html'.format(filer_id)) response = requests.get(base_url, params={'Filer': filer_id}) return (response, loc) filer_ids = set([]) for loc in iglob(os.path.join( CACHE_DIR, 'dos', '*', '*', '[fF]iler.[Tt]xt')): with open(loc, 'r') as fin: for row in csv.reader(fin): if row[0]: filer_ids.add(row[0]) download_all(list(filer_ids), _get_response_loc_pair, options)
def processResults(df, features, output_path, filename): TeamObjects = {TeamsDict['Team_Id'][k] : {'Team_Name' : TeamsDict['Team_Name'][k]} for k in TeamsDict['Team_Id']} for season in range(2003,2016): for k in TeamObjects: for f in features: TeamObjects[k][f] = 0 TeamObjects[k]['GameCount'] = 0 for index, game in df[df.Season == season].iterrows(): d = game.to_dict() Wteam = d['Wteam'] Lteam = d['Lteam'] for f in features: if f.startswith('W'): TeamObjects[Wteam][f] += d[f.replace('Avg', '')] if f.startswith('L'): TeamObjects[Lteam][f] += d[f.replace('Avg', '')] TeamObjects[Wteam]['GameCount'] += 1 TeamObjects[Lteam]['GameCount'] += 1 for k in TeamObjects: for f in features: if TeamObjects[k]['GameCount'] > 0: TeamObjects[k][f] /= TeamObjects[k]['GameCount'] TeamStats = pandas.DataFrame.from_dict(TeamObjects, orient='index') mkdir_p(output_path) TeamStats.to_csv(output_path + filename + str(season) + '.csv') print('Wrote out ' + output_path + filename + str(season) + '.csv')
def user_login(self, user): """ Called immediately after a user authenticates successfully. Saves session information in the user's directory. Expects *user* to be a string containing the username or userPrincipalName. e.g. '[email protected]' or just 'someuser'. """ logging.debug("user_login(%s)" % user) # Make a directory to store this user's settings/files/logs/etc user_dir = os.path.join(self.settings['user_dir'], user) if not os.path.exists(user_dir): logging.info(_("Creating user directory: %s" % user_dir)) mkdir_p(user_dir) os.chmod(user_dir, 0o700) session_file = os.path.join(user_dir, 'session') session_file_exists = os.path.exists(session_file) if session_file_exists: session_data = open(session_file).read() try: session_info = tornado.escape.json_decode(session_data) except ValueError: # Something wrong with the file session_file_exists = False # Overwrite it below if not session_file_exists: with open(session_file, 'w') as f: # Save it so we can keep track across multiple clients session_info = { 'upn': user, # FYI: UPN == userPrincipalName 'session': generate_session_id() } session_info_json = tornado.escape.json_encode(session_info) f.write(session_info_json) self.set_secure_cookie( "gateone_user", tornado.escape.json_encode(session_info))
def backup(path, password_file=None): """ Replaces the contents of a file with its decrypted counterpart, storing the original encrypted version and a hash of the file contents for later retrieval. """ vault = VaultLib(get_vault_password(password_file)) with open(path, 'r') as f: encrypted_data = f.read() # Normally we'd just try and catch the exception, but the # exception raised here is not very specific (just # `AnsibleError`), so this feels safer to avoid suppressing # other things that might go wrong. if vault.is_encrypted(encrypted_data): decrypted_data = vault.decrypt(encrypted_data) # Create atk vault files atk_path = os.path.join(ATK_VAULT, path) mkdir_p(atk_path) # ... encrypted with open(os.path.join(atk_path, 'encrypted'), 'wb') as f: f.write(encrypted_data) # ... hash with open(os.path.join(atk_path, 'hash'), 'wb') as f: f.write(hashlib.sha1(decrypted_data).hexdigest()) # Replace encrypted file with decrypted one with open(path, 'wb') as f: f.write(decrypted_data)
def actually_create_android_project(package_name, sdk_version, java_package_name, is_library): path = os.path.join(os.getcwd(), package_name.lower()) console.pretty_println("\nCreating android project ", console.bold) console.pretty_print(" Name : ", console.cyan) console.pretty_println("%s" % package_name, console.yellow) console.pretty_print(" Sdk Ver : ", console.cyan) console.pretty_println("%s" % sdk_version, console.yellow) console.pretty_print(" Java Name : ", console.cyan) console.pretty_println("%s" % java_package_name, console.yellow) if is_library: console.pretty_print(" Library : ", console.cyan) console.pretty_println("yes\n", console.yellow) cmd = ['android', 'create', 'lib-project', '-n', package_name, '-p', path, '-k', java_package_name, '-t', 'android-' + sdk_version, ] else: activity_name = utils.camel_case(package_name) console.pretty_print(" Activity : ", console.cyan) console.pretty_println("%s\n" % activity_name, console.yellow) cmd = ['android', 'create', 'project', '-n', package_name, '-p', path, '-k', java_package_name, '-t', 'android-' + sdk_version, '-a', activity_name] try: subprocess.check_call(cmd) except subprocess.CalledProcessError: raise subprocess.CalledProcessError("failed to create android project.") # This is in the old form, let's shovel the shit around to the new form utils.mkdir_p(os.path.join(path, 'src', 'main', 'java')) os.remove(os.path.join(path, 'local.properties')) os.remove(os.path.join(path, 'project.properties')) os.remove(os.path.join(path, 'ant.properties')) os.remove(os.path.join(path, 'proguard-project.txt')) os.remove(os.path.join(path, 'build.xml')) os.rmdir(os.path.join(path, 'bin')) os.rmdir(os.path.join(path, 'libs')) shutil.move(os.path.join(path, 'AndroidManifest.xml'), os.path.join(path, 'src', 'main')) shutil.move(os.path.join(path, 'res'), os.path.join(path, 'src', 'main')) if not is_library: shutil.move(os.path.join(path, 'src', java_package_name.split('.')[0]), os.path.join(path, 'src', 'main', 'java'))
def setup(self): """ Create a working directory and some test files """ self.working_dir = tempfile.mkdtemp() self.file_contents = collections.OrderedDict.fromkeys([ 'file.test', '1/file.test', '2/2/file.test', ]) self.file_timestamps = self.file_contents.copy() # create a key for the tests self.key = _create_key(None, write=False) # setup files in subdirectory for path in self.file_contents.keys(): # create file content self.file_contents[path] = str(uuid.uuid4()) abspath = os.path.join(self.working_dir, path) # create subdirs as necessary mkdir_p(os.path.dirname(abspath)) # create test file in dir with open(abspath, 'w') as f: f.write(self.file_contents[path]) # record file creation time self.file_timestamps[path] = os.stat(abspath).st_ctime
def main(): """ Main entry point for execution as a program (instead of as a module). """ args = parse_args() completed_classes = [] mkdir_p(PATH_CACHE, 0o700) if args.clear_cache: shutil.rmtree(PATH_CACHE) if args.on_demand: logging.warning( "--on-demand option is deprecated and is not required" " anymore. Do not use this option. It will be removed" "in the future." ) for class_name in args.class_names: try: logging.info("Downloading class: %s", class_name) if download_class(args, class_name): completed_classes.append(class_name) except requests.exceptions.HTTPError as e: logging.error("HTTPError %s", e) except ClassNotFound as cnf: logging.error("Could not find class: %s", cnf) except AuthenticationFailed as af: logging.error("Could not authenticate: %s", af) if completed_classes: logging.info("Classes which appear completed: " + " ".join(completed_classes))
def make_pull_plot(category, misIDRatios, catRatios, datastring, fitname, fittype): pull_plot = TH1D(category, category, 6, 0, 6 ); others_plot = TH1D(category+"others", category+"others", 6, 0, 6 ); bin_names = get_all_containers(category) for b in range(1, len(bin_names)+1): pull_plot.GetXaxis().SetBinLabel(b,bin_names[b-1]) others_plot.GetXaxis().SetBinLabel(b,bin_names[b-1]) (value, err) = catRatios[get_bin_nr_composite(bin_names[b-1])] pull_plot.SetBinContent(b, value) pull_plot.SetBinError(b, err) other = get_other_component(category, bin_names[b-1]) (valueO, errO) = misIDRatios[get_bin_nr_single(other)] others_plot.SetBinContent(b, valueO) others_plot.SetBinError(b, errO) #print bin_names[b-1], value, valueO pull_plot.Add(others_plot, -1) c = TCanvas("Plot", "Plot", 800,600) ROOT.gStyle.SetOptStat(0) pull_plot.Draw() if len(fittype) == 0: fittype = "histograms" mydir = "pull_plots/%s/%s/%s/" % (fitname, fittype, datastring) mkdir_p(mydir) c.SaveAs("%s/%s_pulls.pdf" % (mydir, category)) c.SaveAs("%s/%s_pulls.png" % (mydir, category))
def sweep(): to_check = [] bioguide = utils.flags().get('bioguide', None) if bioguide: possibles = [bioguide] else: possibles = current_bioguide.keys() for bioguide in possibles: if media_bioguide.get(bioguide, None) is None: to_check.append(bioguide) elif media_bioguide[bioguide]["social"].get(service, None) is None: to_check.append(bioguide) else: pass utils.mkdir_p("cache/social_media") writer = csv.writer(open("cache/social_media/%s_candidates.csv" % service, 'w')) writer.writerow(["bioguide", "official_full", "website", "service", "candidate"]) for bioguide in to_check: candidate = candidate_for(bioguide) if candidate: url = current_bioguide[bioguide]["terms"][-1].get("url", None) writer.writerow([bioguide, current_bioguide[bioguide]['name']['official_full'], url, service, candidate]) print "\tWrote: %s" % candidate
def user_login(self, user): """ Called immediately after a user authenticates successfully. Saves session information in the user's directory. Expects *user* to be a string containing the username or userPrincipalName. e.g. '[email protected]' or just 'someuser'. """ # Make a directory to store this user's settings/files/logs/etc user_dir = self.settings['user_dir'] + "/" + user logging.info("Creating user directory: %s" % user_dir) mkdir_p(user_dir) os.chmod(user_dir, 0700) session_file = user_dir + '/session' if os.path.exists(session_file): session_data = open(session_file).read() session_info = tornado.escape.json_decode(session_data) else: with open(session_file, 'w') as f: # Save it so we can keep track across multiple clients session_info = { 'go_upn': user, # FYI: UPN == userPrincipalName 'go_session': generate_session_id() } session_info_json = tornado.escape.json_encode(session_info) f.write(session_info_json) self.set_secure_cookie("user", tornado.escape.json_encode(session_info))
def create_skeleton(self): """ Create the role's directory and file structure. """ utils.string_to_file(os.path.join(self.output_path, "VERSION"), "master\n") for folder in c.ANSIBLE_FOLDERS: create_folder_path = os.path.join(self.output_path, folder) utils.mkdir_p(create_folder_path) mainyml_template = default_mainyml_template.replace( "%role_name", self.role_name) mainyml_template = mainyml_template.replace( "%values", folder) out_path = os.path.join(create_folder_path, "main.yml") if folder not in ("templates", "meta", "tests", "files"): utils.string_to_file(out_path, mainyml_template) if folder == "meta": utils.create_meta_main(out_path, self.config, self.role_name, self.options.galaxy_categories)
def main(): """ Main entry point for execution as a program (instead of as a module). """ args = parseArgs() completed_classes = [] mkdir_p(PATH_CACHE, 0o700) if args.clear_cache: shutil.rmtree(PATH_CACHE) for class_name in args.class_names: try: logging.info('Downloading class: %s', class_name) if download_class(args, class_name): completed_classes.append(class_name) except requests.exceptions.HTTPError as e: logging.error('HTTPError %s', e) except ClassNotFound as cnf: logging.error('Could not find class: %s', cnf) except AuthenticationFailed as af: logging.error('Could not authenticate: %s', af) if completed_classes: logging.info( "Classes which appear completed: " + " ".join(completed_classes))
def download_syllabus_icourse163(session, leclist, path = '', overwrite = False): headers = { 'Accept':'*/*', 'Accept-Encoding':'gzip, deflate, sdch', 'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4', 'Connection':'keep-alive', 'Host':'v.stu.126.net', #* 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36', 'X-Requested-With':'ShockwaveFlash/15.0.0.239', } session.headers.update(headers) retry_list = [] for week in leclist: cur_week = week[0] lessons = week[1] for lesson in lessons: cur_lesson = lesson[0] lectures = lesson[1] cur_week = clean_filename(cur_week) cur_lesson = clean_filename(cur_lesson) dir = os.path.join(path, cur_week, cur_lesson) if not os.path.exists(dir): mkdir_p(dir) for (lecnum, (lecture_url, lecture_name)) in enumerate(lectures): lecture_name = clean_filename(lecture_name) filename = os.path.join(dir,"%02d_%s.%s"%(lecnum+1, lecture_name, lecture_url[-3:])) print (filename) print (lecture_url) try: resume_download_file(session, lecture_url, filename, overwrite ) except Exception as e: print(e) print('Error, add it to retry list') retry_list.append((lecture_url, filename)) retry_times = 0 while len(retry_list) != 0 and retry_times < 3: print('%d items should be retried, retrying...' % len(retry_list)) tmp_list = [item for item in retry_list] retry_times += 1 for (url, filename) in tmp_list: try: print(url) print(filename) resume_download_file(session, url, filename, overwrite ) except Exception as e: print(e) print('Error, add it to retry list') continue retry_list.remove((url, filename)) if len(retry_list) != 0: print('%d items failed, please check it' % len(retry_list)) else: print('All done.')
def read_cache(self): """ Read cache object inside the .bookbuilder/cache_object.txt. Returns: None if cache_object.txt doesn't exist cache_object of type dict if it does """ # check whether .bookbuilder folder exists # and initialise it if it doesn't if not os.path.exists('.bookbuilder'): print("Creating .bookbuilder folder") mkdir_p('.bookbuilder') cache_object_path = os.path.join('.bookbuilder', 'cache_object.txt') if not os.path.exists(cache_object_path): # create one if it doesn't exist cache_object = self.create_cache_object() return cache_object else: with open(cache_object_path, 'r') as cop: copcontent = cop.read() if len(copcontent) == 0: cache_object = self.create_cache_object() else: cache_object = ast.literal_eval(copcontent) return cache_object
def anonymize_existing(bids_warehouse,anonmap,bids_log): ''' Name: anonymize_existing Description: This function will anonymized BIDSified data in the non-anonymized directory. Arguments: ================================================================================================ bids_warehouse : string A full path to the BIDS warehouse. anonmap : dict (optional) A dictionary mapping URSIs to anonymous IDs. Used if anonymization is to occur. URSIs are keys, anonymous IDs are values. bids_log : logger A logger for the image to BIDS conversion. ''' nonanon_dir=os.path.join(bids_warehouse,'Non-anonymized') anon_dir=os.path.join(bids_warehouse,'Anonymized') for nonanon_root, dirnames, filenames in os.walk(nonanon_dir): for filename in filenames: participants_tsv=False nonanon_file = os.path.join(nonanon_root,filename) ursi=re.findall('M[0-9]{8}',nonanon_file) if ursi: ursi=ursi[0] elif 'participants.tsv' in nonanon_file: participants_tsv=True anon_file=os.path.join(anon_dir,'participants.tsv') else: bids_log.info('Could not find URSI in file %s. (Probably an inherited JSON)' % nonanon_file) continue if not participants_tsv: if ursi not in anonmap.keys(): bids_log.info('URSI %s not in anonymization map. Skipping...' % ursi) continue anon_root = nonanon_root.replace(ursi,anonmap[ursi]) anon_root = anon_root.replace(nonanon_dir,anon_dir) anon_file = nonanon_file.replace(ursi,anonmap[ursi]) anon_file = anon_file.replace(nonanon_dir,anon_dir) mkdir_p(anon_root) if not os.path.isfile(anon_file): if '.nii.gz' in nonanon_file: try: shutil.copy(nonanon_file,anon_file) except: bids_log.info('Could not copy %s' % nonanon_file) else: try: with open(nonanon_file,'rU') as nonanon_f: with open(anon_file,'w') as anon_f: for line in nonanon_f: ursi=re.findall('M[0-9]{8}',line) if ursi: ursi=ursi[0] if ursi in anonmap.keys(): anon_f.write(line.replace(ursi,anonmap[ursi])) else: anon_f.write(line) except: bids_log.info('Could not copy %s' % nonanon_file) else: bids_log.info('%s is already anonymized' % nonanon_file)
def extract_cfo(options): if options.get('loglevel', None): log.setLevel(options['loglevel']) OUT_DIR = os.path.join(ORIG_DIR, 'cfo') if not os.path.exists(OUT_DIR): mkdir_p(OUT_DIR) CFO_CACHE = os.path.join(CACHE_DIR, 'cfo') html_parser = etree.HTMLParser() def _chunks(l, n): """ Yield successive n-sized chunks from l. """ for i in xrange(0, len(l), n): yield l[i:i+n] def _parse_data_tables(d): return pd.io.html.read_html(etree.tostring(d), header=1, index_col=0) def _parse_year_table(y): return y.xpath('.//tr[1]/td[2]/span')[0].text def _extract_tables(pg_html): all_tables_container = pg_html.xpath( "//div[@id='ctl00_ContentPlaceHolder1_divCFSummary']")[0] summary_tables = {_parse_year_table(y): _parse_data_tables(d)[0] for y, d in _chunks(all_tables_container.xpath("table"), 2)} return summary_tables for loc in iglob(os.path.join(CFO_CACHE, '*.html')): log.debug('opening {l}'.format(l=loc)) filer_id = os.path.splitext(os.path.split(loc)[1])[0] with open(loc, 'r') as fin: try: pg_html = etree.parse(fin, parser=html_parser) tables = _extract_tables(pg_html) except Exception as e: log.error('parsing file {l} failed:'.format(l=loc)) log.error(e) try: for year, table in tables.iteritems(): if year: output_dir = os.path.join(OUT_DIR, year) if not os.path.exists(output_dir): mkdir_p(output_dir) output_loc = os.path.join(OUT_DIR, year, '{}.json'.format(filer_id)) table.dropna(axis=1, how='all').to_json( path_or_buf=output_loc, orient='index') else: log.debug('{l} contained {y} as a year?'.format( l=loc, y=year)) except Exception as e: log.error('reading table dict {l} failed:'.format(l=loc)) log.error(e)
def _get_response_loc_pair(dl_info): year, period, filename, filesize, atag_url = dl_info loc_dir = os.path.join(OUT_DIR, year, period) if not os.path.exists(loc_dir): mkdir_p(loc_dir) loc = os.path.join(loc_dir, filename) response = requests.get(atag_url, stream=True) return (response, loc)
def safe_writeScreenshotDescriptionFile(self, out_fname): """ writes screenshot descr file in a safe mode. any problems are reported via warning :param out_fname: {str} :return: None """ mkdir_p(dirname(out_fname)) self.writeScreenshotDescriptionFile(out_fname)
def create_rosjava_project_common(args, template_directory): project_name = args.name[0] console.pretty_println("\nCreating rosjava project ", console.bold) console.pretty_print(" Name : ", console.cyan) console.pretty_println("%s" % project_name, console.yellow) utils.mkdir_p(os.path.join(os.getcwd(), project_name.lower())) # This is in the old form, let's shovel the shit around to the new form create_gradle_package_files(args, template_directory) add_to_root_gradle_settings(args.name[0])