def train_word2vec(self, input_file=None, size=128, augmentation=True): self.size = size save_emb_dir = os.path.join(self.save_dir, 'embedding') if not check_path(save_emb_dir): os.makedirs(save_emb_dir) save_emb_file = os.path.join(save_emb_dir, 'node_w2v_%d' % size) if check_path(save_emb_file): print("pretrained embedding already exisits") return if input_file is None or not check_path(input_file): input_file = os.path.join(self.save_dir, 'train_ast.pkl') trees = pd.read_pickle(input_file) print("Original sample number:", len(trees)) if augmentation: cols = ['pre', 'post', 'level'] else: cols = ['pre'] corpus = [] for c in cols: corpus += trees[c].tolist() print("Sample number to train Word2Vec model:", len(corpus)) from gensim.models.word2vec import Word2Vec w2v = Word2Vec(corpus, size=size, workers=16, sg=1, min_count=3) w2v.save(save_emb_file)
def run(experiment): # Check whether all required paths exist for key in experiment.config.keys(): if key.endswith("_dir"): utils.check_path(experiment.config[key]) preprocess.preprocess(experiment) results = process.train_and_evaluate(experiment)
def save_filter(self, remark: str) -> None: filter_path = self.exp_path + 'filters/' check_path(filter_path) filter_params = np.array([ self.obs_filter.mean, self.obs_filter.square_sum, self.obs_filter.count ]) np.save(filter_path + remark, filter_params) print(f"-------Filter params saved to {filter_path}-------")
def check_squid_files(self): """Validates squid config, cache directory and ownership""" print "Checking Squid Files......", VAR_MAP = {'match_squid_conf': False, 'match_squid_cache': False, 'match_squid_ownership': False, } conf_err_msg = health_check_utils.check_path( self.NAME, "/etc/squid/squid.conf") if not conf_err_msg == "": self._set_status(0, conf_err_msg) elif int(oct(os.stat('/etc/squid/squid.conf').st_mode)) < 644: self._set_status( 0, "[%s]Error: squid.conf has incorrect " "file permissions" % self.NAME) else: VAR_MAP['match_squid_conf'] = True squid_path_err_msg = health_check_utils.check_path( self.NAME, '/var/squid/') if not squid_path_err_msg == "": self.set_stauts(0, squid_path_err_msg) elif health_check_utils.check_path(self.NAME, '/var/squid/cache') != "": self._set_status( 0, health_check_utils.check_path( self.NAME, '/var/squid/cache')) else: VAR_MAP['match_squid_cache'] = True uid = os.stat('/var/squid/').st_uid gid = os.stat('/var/squid/').st_gid if uid != gid or pwd.getpwuid(23).pw_name != 'squid': self._set_status( 0, "[%s]Error: /var/squid directory ownership " "misconfigured" % self.NAME) else: VAR_MAP['match_squid_ownership'] = True failed = [] for key in VAR_MAP.keys(): if VAR_MAP[key] is False: failed.append(key) if len(failed) != 0: self.messages.append( "[%s]Info: Failed components for squid config: %s" % (self.NAME, ', '.join(item for item in failed))) return True
def save_to_file(self): """Save options to settings file. """ check_path(self.config_path) #print("cfg: %s", self.settings_file) with open(self.settings_file, 'w', encoding="utf8") as settings_file: options = self._get_options() #print("options: %s" % options) json.dump(options, settings_file, indent=4, separators=(',', ': '))
def img_watermark(in_video, out_video): check_path(out_video) img_path = "./logo.png" args = [ 'ffmpeg', '-i', in_video, '-i', img_path, '-filter_complex', 'overlay=10:main_h-overlay_h-10', out_video ] p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() if p.returncode != 0: raise Exception('ffprobe', out, err) return out.decode('utf-8')
def trans_mapping(uniref90map, p_trans_map): say("Loading transitive mapping file:\n ", p_trans_map) check_path(p_trans_map) overrides = {} uniref90map_r = {} for header, uniref90 in uniref90map.items(): uniref90map_r.setdefault(uniref90, set()).add(header) with try_open(p_trans_map) as fh: for row in csv.reader(fh, csv.excel_tab): uniref90, uniref50 = row headers = uniref90map_r.get(uniref90, set()) for h in headers: overrides[h] = uniref50 return overrides
def parse_results(results): say("Parsing results file:\n ", results) check_path(results) mapping = {} mode = get_mode(results) min_pident = float(mode.replace("uniref", "")) with try_open(results) as fh: for row in csv.reader(fh, csv.excel_tab): h = Hit(row, config=c_output_format) if h.qseqid not in mapping: if h.pident >= min_pident and h.mcov >= c_min_coverage: uniref = h.sseqid.split("|")[0] mapping[h.qseqid] = uniref return mapping
def split_by_time(in_video, out_video): check_path(out_video) start = '00:00:02' l = '3' # args = ['ffmpeg', '-i', in_video, '-c', 'copy', '-ss', start, out_video] args = [ 'ffmpeg', '-i', in_video, '-vcodec', 'copy', '-acodec', 'copy', '-ss', start, '-t', l, '-sn', out_video ] p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() if p.returncode != 0: raise Exception('ffmpeg', out, err) print(out.decode('utf-8'))
def parse_spm5_preproc(work_dir, step): doc = {} if hasattr(step, 'spatial') and hasattr(step.spatial, 'realign'): realign = step.spatial.realign.estwrite motion = [] if len(realign.data[0].shape) == 0: realign = [realign] else: realign = realign.data for session in realign: data_dir = find_data_dir(work_dir, check_path(session[0])) motion.append(glob.glob(os.path.join(data_dir, 'rp_*.txt'))[0]) doc['motion'] = motion if hasattr(step, 'spatial') and isinstance(step.spatial, np.ndarray): doc['anatomy'] = makeup_path(work_dir, check_path(step.spatial[0].preproc.data)) doc['wmanatomy'] = prefix_filename( makeup_path( work_dir, check_path(step.spatial[1].normalise.write.subj.resample)), 'w') if hasattr(step, 'temporal'): doc['n_slices'] = int(step.temporal.st.nslices) doc['ref_slice'] = int(step.temporal.st.refslice) doc['slice_order'] = step.temporal.st.so.tolist() doc['ta'] = float(step.temporal.st.ta) doc['tr'] = float(step.temporal.st.tr) doc['bold'] = [] doc['swabold'] = [] if len(step.temporal.st.scans[0].shape) == 0: bold = [step.temporal.st.scans] else: bold = step.temporal.st.scans for session in bold: data_dir = find_data_dir(work_dir, str(session[0])) doc['bold'].append( check_paths([ os.path.join(data_dir, os.path.split(str(x))[1]) for x in session ])) doc['swabold'].append( check_paths([ prefix_filename( os.path.join(data_dir, os.path.split(str(x))[1]), 'swa') for x in session ])) doc['n_scans'] = [len(s) for s in doc['bold']] return doc
def check_snmp_mibs(self): """Checks if SNMP MIB files are properly placed""" print "Checking SNMP MIBs......", conf_err_msg = health_check_utils.check_path(self.NAME, '/etc/snmp/snmp.conf') if not conf_err_msg == "": self._set_status(0, conf_err_msg) mibs_err_msg = health_check_utils.check_path( self.NAME, '/usr/local/share/snmp/mibs') if not mibs_err_msg == "": self._set_status(0, mibs_err_msg) return True
def build_seq_index(self, set_type, augmentation=True): save_path = os.path.join(self.save_dir, 'embedding', '%s.pkl' % set_type) if check_path(save_path): print("id sequence file already exists") return from gensim.models.word2vec import Word2Vec word2vec = Word2Vec.load( os.path.join(self.save_dir, 'embedding', 'node_w2v_%d' % self.size)).wv vocab = word2vec.vocab max_token = word2vec.vectors.shape[0] def tree_to_index(seq_list): result = [ vocab[w].index if w in vocab else max_token for w in seq_list ] return result if augmentation: cols = ['pre', 'post', 'level'] else: cols = ['pre'] fpath = os.path.join(self.save_dir, '%s_ast.pkl' % set_type) trees = pd.read_pickle(fpath) for c in cols: trees[c] = trees[c].apply(tree_to_index) cols = cols + ['label'] trees[cols].to_pickle(save_path) seq_len = trees['pre'].apply(len) print("min/max sequence lenght: ", seq_len.min(), seq_len.max())
def __init__(self, train_file, valid_file, test_file): print("Beinging preprocess data...") self.set_types = ['train', 'valid', 'test'] self.files = [train_file, valid_file, test_file] for f in self.files: assert check_path(f), "%s not exists" % f self.save_dir = os.path.dirname(train_file)
def __init__(self, parameters): if not isinstance(parameters, dict): raise TypeError('"parameters" must be a dict type') if 'path_data' not in parameters.keys(): raise KeyError( '"path_data" is not in "parameters", "path_data" is a necessary parameter' ) if not check_path(parameters['path_data']): raise ValueError(f"'{parameters['path_data']}' does not exists") if not parameters['target']: raise KeyError( '"target" is not in "parameters", "target" is a necessary parameter' ) self.path = parameters['path_data'] self.sep = parameters['sep'] self.target = parameters['target'] self.id = parameters['id'] self.type = parameters['type'] self.num_features = parameters['num_features'] self.threshold = parameters['threshold'] self.n_jobs = parameters['n_jobs'] self.output_file_name = parameters['output_file_name'] self.df = None logging.info(f"Object {self} is created")
def parse_spm8_preproc(work_dir, step): doc = {} if hasattr(step, 'spatial') and hasattr(step.spatial, 'preproc'): doc['anatomy'] = makeup_path( work_dir, check_path(step.spatial.preproc.data)) doc['wmanatomy'] = prefix_filename(doc['anatomy'], 'wm') if hasattr(step, 'temporal'): doc['n_slices'] = int(step.temporal.st.nslices) doc['ref_slice'] = int(step.temporal.st.refslice) doc['slice_order'] = step.temporal.st.so.tolist() doc['ta'] = float(step.temporal.st.ta) doc['tr'] = float(step.temporal.st.tr) doc['bold'] = [] doc['swabold'] = [] if len(step.temporal.st.scans[0].shape) == 0: bold = [step.temporal.st.scans] else: bold = step.temporal.st.scans for session in bold: data_dir = find_data_dir(work_dir, str(session[0])) doc['bold'].append(check_paths( [os.path.join(data_dir, os.path.split(str(x))[1]) for x in session])) doc['swabold'].append(check_paths( [prefix_filename(os.path.join( data_dir, os.path.split(str(x))[1]), 'swa') for x in session])) doc['n_scans'] = [len(s) for s in doc['bold']] return doc
def save_plot(self): # add options to change file format, background color/transparency, # resolution, padding, etc. file_prefix = os.path.join('Plots', time.strftime('%Y-%m-%d'), time.strftime('%Z.%H.%M.%S')) plot_file = file_prefix + '.eps' utils.check_path(plot_file) try: # bug in matplotlib 1.4.0 prevents this from working # (https://github.com/matplotlib/matplotlib/pull/3434) plt.savefig(plot_file, format='eps', bbox_inches='tight') except: plt.savefig(plot_file, format='eps') print '\nPlot saved as ' + plot_file plot_log_file = file_prefix + '.log' self.save_log(filename=plot_log_file)
def __init__(self, facenet_model_dir, mtcnn_model_dir, resize_factor=0.7, match_thresh=0.7, mtcnn_params=MTCNN_PARAMS_DEFAULT, db_load_path=None, database_verbose=False, gpu_memory_fraction=0.4): ''' Arguments: facenet_model_dir: Directory containing the FaceNet model, with meta-file and ckpt-file mtcnn_model_dir: Directory containing MTCNN model, det1.npy, det2.npy, det3.npy resize_factor: input image will be resize to a smaller size before sent into MTCNN and FaceNet match_thresh: the lower bound of a valid match (if distance between 2 embeddings is higher than this threshold, then the corresponding 2 faces will be viewed as different) mtcnn_params: Parameters of MTCNN, a dictionary with 3 keys, 'minsize', 'threshold', 'factor' db_load_path: if set, load existed Face database database_verbose: if True, print extra messages as database updates, and vice versa gpu_memory_fraction: Fraction of GPU memory to be queried, in range [0,1] Return: an instance of FaceRecognizer object ''' self._facenet_model_dir = os.path.abspath( os.path.expanduser(facenet_model_dir)) self._mtcnn_model_dir = os.path.abspath( os.path.expanduser(mtcnn_model_dir)) self._minsize = mtcnn_params['minsize'] self._threshold = mtcnn_params['threshold'] self._factor = mtcnn_params['factor'] self._match_thresh = match_thresh self._resize_factor = resize_factor # define graph to be built on self._graph = tf.Graph() # self._graph = tf.Graph() # define session sess_conf = tf.ConfigProto() sess_conf.gpu_options.allow_growth = True sess_conf.gpu_options.per_process_gpu_memory_fraction = gpu_memory_fraction self._sess = tf.Session(graph=self._graph, config=sess_conf) # I/O of Facenet self._images_placeholder = [] self._embeddings = [] # database if db_load_path is not None: # load existing Face database db_load_path = utils.check_path(db_load_path) with open(db_load_path, 'rb') as handle: self._database = pickle.load(handle) print('Load FaceDatabase from {}'.format(db_load_path)) self._database._verbose = database_verbose else: # create new Face database self._database = FaceDatabase(database_verbose) # MTCNN self._pnet = [] self._rnet = [] self._onet = [] # embeddings and corresponding names obtained from current inference self._cur_embs = [] self._cur_embs_name = []
def getEdgeFeatureCSV(years,resource): gs={} global countries cList =[] As=[] for y in years: gs[y]=get_graph(y, resource) e = get_graph(y,'essex') cList.append(set(gs[y].nodes())) cList.append(set(e.nodes())) #print 'For the year' #pprint(gs[y].nodes()) #pprint(e.nodes()) #pprint(cList) countries = list(set.intersection(*cList)) print 'Countries included so far:' print countries edgefeatureYears=years[:-1] nodefeatureYear=years[-2] nodefeatureYears=[nodefeatureYear] nodefeatures=node_feature_extraction(nodefeatureYears,nodefeatureDict,resource) edgefeatures=edge_feature_extraction(edgefeatureYears, countries, edgefeatureDict,resource) nodeToEdgeFeatures=convertNodalFeaturesToEdgeFeatures(countries, nodefeatureYears, nodefeatures) edgefeatures.update(nodeToEdgeFeatures) features=edgefeatures.keys() check_path('data/raw/features/'+resource[0]+'/edgedata/') filename=open('data/raw/features/'+resource[0]+'/edgedata/'+str(years[-1])+'.csv', 'wb') writer = csv.writer(filename) fnames=copy.deepcopy(features) fnames.append("t") fnames.insert(0, "edge") writer.writerow(fnames) for c1 in countries: for c2 in countries: row=[c1+"_"+c2, ] for f in features: row.append(edgefeatures[f][(c1, c2)]) try: t=gs[years[-1]][c1][c2]["weight"] except: t=0 row.append(t) writer.writerow(row)
def __init__(self, parameters): if not isinstance(parameters, dict): raise TypeError('"parameters" must be a dict type') if 'path_data' not in parameters.keys(): raise KeyError( '"path_data" is not in "parameters", "path_data" is a necessary parameter' ) if not check_path(parameters['path_data']) or not check_dir( parameters['path_data']): raise ValueError( f"'{parameters['path_data']}' does not exists or is not folder" ) if not check_csv(parameters['path_data']): raise ValueError( f"'{parameters['path_data']}' must contain only csv files") all_file_founded, list_not_founded_files = check_csv_files( parameters['path_data'], parameters['tables']) if not all_file_founded: raise ValueError( f"{list_not_founded_files} is not founded in {parameters['path_data']}" ) self.path = parameters['path_data'] self.tables = parameters['tables'] self.main_table = parameters['main_table'] self.depth = parameters['depth'] self.relations = parameters['relations'] self.sep = parameters['sep'] self.n_jobs = min(os.cpu_count(), parameters['n_jobs']) self.chunk_size = parameters['chunk_size'] self.max_features = parameters['max_features'] self.target = parameters['target'] self.paths_files = [ os.path.join(self.path, table) for table, _ in self.tables.items() ] self.drop_contains = [col for _, col in self.tables.items() if col ] + [self.target + ')'] self.dict_dataframes = dict() self.entities = None self.agg_primitives = parameters['agg_primitives'] self.trans_primitives = parameters['trans_primitives'] self.feature_matrix = None self.feature_names = None self.output_file_name = parameters['output_file_name'] self.time_indecies = parameters['time_indecies'] self.time_variables = parameters[ 'time_variables'] # TODO заменить на обработку из файла Ани self.feature_names_file = parameters['file_name_for_features'] self.generate_selected_features = parameters[ 'generate_selected_features'] self.file_name_with_selected_features = parameters[ 'file_with_selected_features'] self.selected_features = list() logging.info(f"Object {self} is created")
def proc_func(csv_path): start_time = time.time() print(csv_path + ' has began ...') ckpt_path = ckpt_folder + csv_path + '.ckpt' check_path(ckpt_path, binary=True) ckpt_index = read_ckpt(ckpt_path) csv_file = csv.reader(open(csv_folder + csv_path + '.csv')) _ = next(csv_file) rows = [row for row in csv_file] print('start from checkpoint ' + str(ckpt_index + 1) + ' in ' + str(csv_path)) for i in tqdm(range(ckpt_index + 1, len(rows))): process(rows[i][0], rows[i][1]) save_ckpt(i, ckpt_path) print(csv_path + ' has been done in ' + str(time.time() - start_time))
def init_log(args): import time current_time = time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime()) # Creating log file save_path = './mnist/' check_path(save_path) file = save_path + 'q_' + args.hiddens + '_n_' + str(args.num_masks) + \ '_lr_' + str(args.learning_rate) + '_wd_' + str(args.weight_decay) + \ '_b_' + str(args.batch_size) + '_' + current_time logging.basicConfig( filename=file + '.log', filemode='w', level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) return logger, file
def parse_spm5_preproc(work_dir, step): doc = {} if hasattr(step, 'spatial') and hasattr(step.spatial, 'realign'): realign = step.spatial.realign.estwrite motion = [] if len(realign.data[0].shape) == 0: realign = [realign] else: realign = realign.data for session in realign: data_dir = find_data_dir(work_dir, check_path(session[0])) motion.append(glob.glob(os.path.join(data_dir, 'rp_*.txt'))[0]) doc['motion'] = motion if hasattr(step, 'spatial') and isinstance(step.spatial, np.ndarray): doc['anatomy'] = makeup_path( work_dir, check_path(step.spatial[0].preproc.data)) doc['wmanatomy'] = prefix_filename(makeup_path( work_dir, check_path(step.spatial[1].normalise.write.subj.resample)), 'w') if hasattr(step, 'temporal'): doc['n_slices'] = int(step.temporal.st.nslices) doc['ref_slice'] = int(step.temporal.st.refslice) doc['slice_order'] = step.temporal.st.so.tolist() doc['ta'] = float(step.temporal.st.ta) doc['tr'] = float(step.temporal.st.tr) doc['bold'] = [] doc['swabold'] = [] if len(step.temporal.st.scans[0].shape) == 0: bold = [step.temporal.st.scans] else: bold = step.temporal.st.scans for session in bold: data_dir = find_data_dir(work_dir, str(session[0])) doc['bold'].append(check_paths( [os.path.join(data_dir, os.path.split(str(x))[1]) for x in session])) doc['swabold'].append(check_paths( [prefix_filename(os.path.join( data_dir, os.path.split(str(x))[1]), 'swa') for x in session])) doc['n_scans'] = [len(s) for s in doc['bold']] return doc
def run(self): source_file = self.LATEST_YOUTUBE_DL + YOUTUBEDL_BIN destination_file = os.path.join(self.download_path, YOUTUBEDL_BIN) self._talk_to_gui('download', destination_file) check_path(self.download_path) try: stream = request.urlopen(source_file, timeout=self.DOWNLOAD_TIMEOUT) with open(destination_file, 'wb') as dest_file: dest_file.write(stream.read()) self._talk_to_gui('correct') except (HTTPError, URLError, IOError) as error: self._talk_to_gui('error', error) if not self.quiet: self._talk_to_gui('finish')
def text_watermark(in_video, out_video): check_path(out_video) text = "测试中文水印" x = 10 y = 10 fontf = "./hysjsJ.ttf" fontsize = 12 fcolor = "white" sc = 'black' shadow_x = 5 shadow_y = 5 custom = "drawtext=text={text}:x=100:y=H-th-100:fontfile={fontf}:fontsize=38:fontcolor=red:shadowcolor=black:shadowx=5:shadowy=5 " custom = custom.format(text=text, fontf=fontf) args = ['ffmpeg', '-i', in_video, '-vf', custom, out_video] p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() if p.returncode != 0: raise Exception('ffprobe', out, err) return out.decode('utf-8')
def _write(self, data, mode): """Write data to the log file. That's the main method for writing to the log file. Args: data (string): String to write on the log file. mode (string): Can be any IO mode supported by python. """ check_path(self.config_path) with open(self.log_file, mode) as log: if mode == 'a' and self.add_time: msg = self.TIME_TEMPLATE.format(time=strftime('%c'), error_msg=data) else: msg = data log.write(msg)
def check_rsyslogd(self): """Validates rsyslogd configuration and service""" print "Checking rsyslog......", conf_err_msg = health_check_utils.check_path(self.NAME, '/etc/rsyslog.conf') if not conf_err_msg == "": self._set_status(0, conf_err_msg) dir_err_msg = health_check_utils.check_path(self.NAME, '/etc/rsyslog.d/') if not dir_err_msg == "": self._set_status(0, dir_err_msg) serv_err_msg = health_check_utils.check_service_running(self.NAME, 'rsyslogd') if not serv_err_msg == "": self._set_status(0, serv_err_msg) return True
def uniref_search(diamond=None, database=None, query=None, seqtype=None, temp=None, diamond_options=None, force_search=False): if which(diamond) is None: die("<diamond> is not executable as: {}".format(diamond)) for path in [database, query, temp]: check_path(path) binary = {"nuc": "blastx", "prot": "blastp"}[seqtype] mode = get_mode(database) results = os.path.split(query)[1] results = os.path.join(temp, results) results = ".".join([results, mode, "hits"]) command = [ diamond, binary, "--db", database, "--query", query, "--outfmt", c_output_format, "--tmpdir", temp, "--out", results, "--id", get_mode(results).replace("uniref", ""), c_diamond_filters, ] command = " ".join([str(k) for k in command]) command += (" " + diamond_options) if diamond_options is not None else "" if force_search or not os.path.exists(results): say("Executing:\n ", command) os.system(command) else: say("Using existing results file:\n ", results) return results
def get_intra_images(mat_file, work_dir, memory=Memory(None)): mat = memory.cache(load_matfile)(mat_file)['SPM'] images = {} images['beta_maps'] = [] images['c_maps'] = {} images['t_maps'] = {} images['contrasts'] = {} for c in mat.xCon: name = safe_name(str(c.name)) try: images['c_maps'][name] = check_path( os.path.join(work_dir, str(c.Vcon.fname))) images['t_maps'][name] = check_path( os.path.join(work_dir, str(c.Vspm.fname))) images['contrasts'][name] = c.c.tolist() except: pass # sometimes c.Vcon is an empty array for i, b in enumerate(mat.Vbeta): images['beta_maps'].append( check_path(os.path.join(work_dir, str(b.fname)))) return images
def test_windows_target(args): utils.check_path(os.path.join(os.environ["QTKITS"], "msvc2017_64")) msvc_env = R"C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat" utils.check_path(msvc_env) qmake_path = os.path.join(os.environ["QTKITS"], "msvc2017_64", "bin", "qmake.exe") utils.check_path(qmake_path)
def plot_tsne(embeddings, vocab, output_path, format='png', random_state=0, normalize=False): """ embeddings: numpy.array of shape (num_concepts, emb_dim) vocab: list[str] output_path: str format: str (optional, default 'png') random_state: str (optional, default 0) """ assert len(embeddings) == len(vocab) concept2id = {w: i for i, w in enumerate(vocab)} all_ids = [] offsets = [0] for cate in ['animals', 'vehicles', 'grammar', 'verb']: cate_ids = [concept2id[s] for s in categories[cate] if s in concept2id] all_ids += cate_ids offsets.append(offsets[-1] + len(cate_ids)) oov_rate = 1 - len(cate_ids) / len(categories[cate]) print('[tsne] {} oov rate = {}'.format(cate, oov_rate)) original_x = embeddings[np.array(all_ids)] if normalize: original_x = original_x / np.sqrt( (original_x**2).sum(1))[:, np.newaxis] reduced_x = TSNE(2, random_state=random_state).fit_transform(original_x) fig = plt.figure() ax = fig.add_subplot(111) for i, cate in enumerate(['animals', 'vehicles', 'grammar', 'verb']): cate_x = reduced_x[offsets[i]:offsets[i + 1]] ax.scatter(cate_x[:, 0], cate_x[:, 1], label=cate, color=colors[i]) ax.set_yticklabels([]) ax.set_xticklabels([]) ax.legend(fancybox=True, shadow=True) check_path(output_path) fig.savefig(output_path, format=format, bbox_inches='tight')
def get_intra_preproc(mat_file, work_dir, n_scans, memory=Memory(None)): mat = memory.cache(load_matfile)(mat_file)['SPM'] preproc = {} get_motion_file = False if len(n_scans) > 1: preproc['motion'] = [] for session in mat.Sess: preproc['motion'].append(session.C.C.tolist()) if session.C.C.size == 0: get_motion_file = True else: preproc['motion'] = [mat.Sess.C.C.tolist()] if mat.Sess.C.C.size == 0: get_motion_file = True swabold = check_paths(mat.xY.P) if len(nb.load(makeup_path(work_dir, swabold[0])).shape) == 4: swabold = np.unique(swabold) else: swabold = np.split(swabold, np.cumsum(n_scans)[:-1]) if get_motion_file: preproc['motion'] = [] for session in swabold: session_dir = find_data_dir(work_dir, check_path(session[0])) if get_motion_file: motion_file = glob.glob(os.path.join(session_dir, 'rp_*.txt'))[0] motion = np.fromfile(motion_file, sep=' ') motion = motion.reshape(motion.shape[0] / 6, 6) preproc['motion'].append(motion) if isinstance(session, (list, np.ndarray)): scans = [ os.path.join(session_dir, os.path.split(scan)[1].strip()) for scan in session ] preproc.setdefault('swabold', []).append(scans) preproc.setdefault('abold', []).append( [strip_prefix_filename(scan, 2) for scan in scans]) preproc.setdefault('bold', []).append( [strip_prefix_filename(scan, 3) for scan in scans]) else: preproc.setdefault('swabold', []).append(session) preproc.setdefault('abold', []).append(strip_prefix_filename(session, 2)) preproc.setdefault('bold', []).append(strip_prefix_filename(session, 3)) return preproc
def f_to_csv(featureData,filepath,year): """ Takes featureData as returned by feature_extraction and a featureDict and formats rows = country codes, columns = feature values with a header by featureData keys """ fkeys = ['country code'] for f in featureData: fkeys.append(f) formatted = feature_reformat(featureData) formattedAgain = [] #Turn it from a dictionary into a list for c in formatted: boo = [c] boo.extend(formatted[c]) formattedAgain.append(boo) check_path(filepath) with open(filepath+year+'.csv', 'wb') as f: writer = csv.writer(f) writer.writerow(fkeys) writer.writerows(formattedAgain) return True
def _get_toolchain(tree, root, keep_archive=False, clean=False): archive = _create_name(tree, suffix=".tar.bz2") tree.insert(0, root) dir = path.join(_create_path(tree)) archive_dir = path.join(dir, archive) if not check_path(dir, clean): if download(TOOLCHAIN_FORGE + archive, archive_dir): extract(archive_dir, dir) if not keep_archive: remove(archive_dir) else: print "! %s already exists" % dir
def check_apache_conf(self, apache_service): """ Validates if Apache settings. :param apache_service : service type of apache, os dependent. e.g. httpd or apache2 :type apache_service : string """ print "Checking Apache Config......", conf_err_msg = health_check_utils.check_path( self.NAME, "/etc/%s/conf.d/ods-server.conf" % apache_service) if not conf_err_msg == "": self._set_status(0, conf_err_msg) wsgi_err_msg = health_check_utils.check_path( self.NAME, '/var/www/compass/compass.wsgi') if not wsgi_err_msg == "": self._set_status(0, wsgi_err_msg) return True
def __init__(self, args, verbose=True): self.verbose = verbose # define social LSTM model print('Building social LSTM model') with open(utils.check_path(args.social_conf_path), 'rb') as f: self._social_conf = pickle.load(f) self._model = SocialModel(self._social_conf, True) # define session self._sess = tf.InteractiveSession() # restore model parameters restorer = tf.train.Saver() ckpt = tf.train.get_checkpoint_state( os.path.abspath(os.path.expanduser(args.ckpt_dir))) print('loading model: {}'.format(ckpt.model_checkpoint_path)) restorer.restore(self._sess, ckpt.model_checkpoint_path) # probability of a new pedestrian pops up if self._cur_num_peds doesn't reach self._max_num_peds self._new_peds_prob = args.new_peds_prob # maximum number of pedestrian in a frame self._max_num_peds = self._social_conf.maxNumPeds # number of pedestrian in the current frame self._cur_num_peds = 0 # a list to indicate which pedestrians among all max_num_peds pedestrian exist self._peds_exist = [False] * self.max_num_peds # internal data for social LSTM model self._data = np.zeros((1, self._max_num_peds, 3)) # shape=(1,MNP,3) self._grid_data = np.zeros( (1, self._max_num_peds, self._max_num_peds, self._social_conf.grid_size**2)) # shape=(1,MNP,MNP,grid_size**2) self._init_data = np.zeros((args.init_num_step, self._max_num_peds, 3)) # shape=(init_num_step,MNP,3) self._init_grid_data = np.zeros( (args.init_num_step, self._max_num_peds, self._max_num_peds, self._social_conf.grid_size**2 )) # shape=(init_num_step,MNP,MNP,grid_size**2) # shape of background, a 2-element list [width, height] self._bg_shape = args.bg_shape # number of step for initialization of a pedestrian self._init_num_step = args.init_num_step # for interpolation self._n_interp = args.n_interp self._interp_count = 0 self._prev_data = np.zeros(self._data.shape) self._output_data = np.zeros(self._data.shape)
def get_intra_preproc(mat_file, work_dir, n_scans, memory=Memory(None)): mat = memory.cache(load_matfile)(mat_file)['SPM'] preproc = {} get_motion_file = False if len(n_scans) > 1: preproc['motion'] = [] for session in mat.Sess: preproc['motion'].append(session.C.C.tolist()) if session.C.C.size == 0: get_motion_file = True else: preproc['motion'] = [mat.Sess.C.C.tolist()] if mat.Sess.C.C.size == 0: get_motion_file = True swabold = check_paths(mat.xY.P) if len(nb.load(makeup_path(work_dir, swabold[0])).shape) == 4: swabold = np.unique(swabold) else: swabold = np.split(swabold, np.cumsum(n_scans)[:-1]) if get_motion_file: preproc['motion'] = [] for session in swabold: session_dir = find_data_dir(work_dir, check_path(session[0])) if get_motion_file: motion_file = glob.glob(os.path.join(session_dir, 'rp_*.txt'))[0] motion = np.fromfile(motion_file, sep=' ') motion = motion.reshape(motion.shape[0] / 6, 6) preproc['motion'].append(motion) if isinstance(session, (list, np.ndarray)): scans = [os.path.join(session_dir, os.path.split(scan)[1].strip()) for scan in session] preproc.setdefault('swabold', []).append(scans) preproc.setdefault('abold', []).append( [strip_prefix_filename(scan, 2) for scan in scans]) preproc.setdefault('bold', []).append( [strip_prefix_filename(scan, 3) for scan in scans]) else: preproc.setdefault('swabold', []).append(session) preproc.setdefault('abold', []).append( strip_prefix_filename(session, 2)) preproc.setdefault('bold', []).append( strip_prefix_filename(session, 3)) return preproc
def start(self): check_path(PATH['test_data'], create=True) with open(pjoin(PATH['test_data'], 'test.in'), 'w') as fw, \ open(pjoin(PATH['data'], 'testing_data.txt')) as f, \ open(pjoin(PATH['data'], 'correct', 'MSR_Sentence_Completion_Challenge_V1', 'Data', 'Holmes.machine_format.answers.txt')) as fans: #regex = re.compile(r'^(.*)\[([^]])\](.*)$') regex = re.compile(r'^(\d+[a-e]\)) +(.*)\[([^]]*)\](.*)$') while True: s = f.readline().strip('\n') if not s: return fin_s = regex.sub(r'\2[]\4', s) match = regex.match(s) opt = [match.group(3)] for i in range(4): s = f.readline() match = regex.match(s) opt.append(match.group(3)) sans = fans.readline().strip('\n') ans = regex.match(sans).group(3) fw.write('{}\n{}\n{} {}\n'.format( fin_s, ' '.join(opt), ans, opt.index(ans) ))
def _c2ast(self, fpath, set_name): save_path = os.path.join(self.save_dir, "%s_ast.pkl" % set_name) if check_path(save_path): return from pycparser import c_parser from parsetree import TreeTraverse cparser = c_parser.CParser() tparser = TreeTraverse() df = pd.read_pickle(fpath) df['ast'] = df['code'].apply(cparser.parse) df['pre'] = df['ast'].apply(tparser.preorder) df['post'] = df['ast'].apply(tparser.postorder) df['level'] = df['ast'].apply(tparser.levelorder) df.to_pickle(save_path)
def check_ntp(self): """Validates ntp configuration and service""" print "Checking NTP......", conf_err_msg = health_check_utils.check_path(self.NAME, '/etc/ntp.conf') if not conf_err_msg == "": self._set_status(0, conf_err_msg) serv_err_msg = health_check_utils.check_service_running(self.NAME, 'ntpd') if not serv_err_msg == "": self._set_status(0, serv_err_msg) return True
def load_intra(mat_file, memory=Memory(None), **kwargs): doc = {} mat_file = os.path.realpath(mat_file) doc.update(parse_path(mat_file, **kwargs)) work_dir = os.path.split(mat_file)[0] mat_file = memory.cache(load_matfile)(mat_file) mat = mat_file['SPM'] doc.update(get_intra_infos(mat_file, memory)) doc['mask'] = check_path(os.path.join(work_dir, str(mat.VM.fname))) doc['onsets'], doc['conditions'] = get_intra_onsets(mat_file, memory) doc.update(get_intra_preproc(mat_file, work_dir, doc['n_scans'], memory)) doc.update(get_intra_images(mat_file, work_dir, memory)) doc.update( get_intra_design(mat_file, doc['n_scans'], doc['contrasts'], memory)) return doc
def load_intra(mat_file, memory=Memory(None), **kwargs): doc = {} mat_file = os.path.realpath(mat_file) doc.update(parse_path(mat_file, **kwargs)) work_dir = os.path.split(mat_file)[0] mat_file = memory.cache(load_matfile)(mat_file) mat = mat_file['SPM'] doc.update(get_intra_infos(mat_file, memory)) doc['mask'] = check_path(os.path.join(work_dir, str(mat.VM.fname))) doc['onsets'], doc['conditions'] = get_intra_onsets(mat_file, memory) doc.update(get_intra_preproc(mat_file, work_dir, doc['n_scans'], memory)) doc.update(get_intra_images(mat_file, work_dir, memory)) doc.update(get_intra_design( mat_file, doc['n_scans'], doc['contrasts'], memory)) return doc
def main(): project_dir = get_project_dir() flatc_path = os.path.join(project_dir, ".vendor", "installed", "msvc-2017-x64-windows-release", "bin", "flatc.exe") msg_fbs = os.path.join(project_dir, "app", "messages", "messages.fbs") udp_hdr_fbs = os.path.join(project_dir, "app", "messages", "udpheader.fbs") utils.check_path(flatc_path) utils.check_path(msg_fbs) utils.check_path(udp_hdr_fbs) gen_msgs = f"{flatc_path} --cpp {msg_fbs}" gen_udp_hdr = f"{flatc_path} --cpp {udp_hdr_fbs}" with utils.pushd(os.path.join(project_dir, "app", "messages")): utils.execute(gen_msgs.split()) utils.execute(gen_udp_hdr.split())