def write_eval_result_matrix_to_file(self, file_path): """ Writes this object to a json file """ complete_dict = {} json_compatible_eval_data = {} for elem_thresh in self.elem_threshs: m_thresh_dict = {} for m_thresh in self.maj_threshs: drop_thresh_dict = {} for d_thresh in self.file_level_threshs: if self.is_none_entry(elem_thresh, m_thresh, d_thresh): drop_thresh_dict[d_thresh] = "None" else: drop_thresh_dict[d_thresh] = self._eval_data[ elem_thresh][m_thresh][d_thresh] m_thresh_dict[m_thresh] = drop_thresh_dict json_compatible_eval_data[elem_thresh] = m_thresh_dict complete_dict[self.EVAL_DATA] = json_compatible_eval_data complete_dict[self.SOL_MATRIX_SIZE] = self._sol_matrix_size complete_dict[self.ELEM_THRESHS] = self.elem_threshs complete_dict[self.MAJ_TRHESHS] = self.maj_threshs complete_dict[self.FILE_LEVEL_TRHESHS] = self.file_level_threshs FileUtil.write_dict_to_json(file_path, complete_dict)
def __init__(self, file_name=None): """Creates a DriversDict composed by Driver objects, from a file with a list of drivers. Requires: If given, file_name is str with the name of a .txt file containing a list of drivers organized as in the examples provided in the general specification (omitted here for the sake of readability). Ensures: if file_name is given: a DriversDict, composed by objects of class Driver that correspond to the drivers listed in file with name file_name. if file_name is none: a empty DriversList.""" UserDict.__init__(self) if file_name is not None: inFile = FileUtil(file_name) for line in inFile.getContent(): driverData = line.rstrip().split(", ") driverName = driverData.pop(DriversDict.INDEXDriverName) driverEntryTime, driverAccumTime = driverData driverEntryTime = Time(driverEntryTime) driverAccumTime = Time(driverAccumTime) newDriver = Driver(driverName, driverEntryTime, driverAccumTime) self[driverName] = newDriver
def _precalculate_spacy_lemmatizer(cls, spacy_lemmatizer, dataset_tuple, output_path): word_to_lemma_map = {} def iterate_files(tokenizer, preprecessor, folder): for file in FileUtil.get_files_in_directory(folder, True): file_representation = tokenizer.tokenize(file) file_representation.preprocess(preprecessor) for word in file_representation.token_list: lemma = [token.lemma_ for token in spacy_lemmatizer(word)] if len(lemma) > 1: log.info( f"More than one lemma {lemma} for \"{word}\". Using \"{''.join(lemma)}\" as lemma" ) lemma = "".join(lemma) if word in word_to_lemma_map: if not word_to_lemma_map[word] == lemma: log.info( f"Different Duplicate Lemma for {word}: {word_to_lemma_dataframe[word]} <-> {lemma}" ) else: word_to_lemma_map[word] = lemma for dataset, code_pre, code_tok, req_pre, req_tok in dataset_tuple: iterate_files(req_tok, req_pre, dataset.req_folder()) iterate_files(code_tok, code_pre, dataset.code_folder()) word_to_lemma_dataframe = pandas.DataFrame.from_dict( word_to_lemma_map, orient="index", columns=[cls.COLUMN_LEMMA]) FileUtil.write_dataframe_to_csv(word_to_lemma_dataframe, output_path)
def copyImage(target_path, microsoft_path, image_files): print('\n---------------------- start 比较和copy ----------------------') if not os.path.isdir(target_path): print("%s文件夹未创建" % target_path) FileUtil.mkdir(target_path) # 比较(文件名是否在目标文件夹已存在) 分辨率文件夹 # 微软壁纸文件 num = 0 for key, value in image_files.items(): # 目标文件路径和+分辨率 now_path = target_path + '\\' + value # 路径已存在-判断文件 if not os.path.isdir(now_path): # 创建目标路径+分辨率 FileUtil.mkdir(now_path) # 比较文件 files = os.listdir(now_path) if key + '.png' in files: print(key, '文件已存在') else: num = num + 1 print(str(num) + '. ' + key + '文件不存在-copy文件') shutil.copyfile(microsoft_path + '\\' + key, now_path + '\\' + key + '.png') print('---------------------- end 比较和copy ----------------------\n')
def main(): index = 0 for dp in FileUtil.get_all_datapoints(): FileUtil.save_dp_as_image(dp, PROCESSED_WAVE_DIRECTORY, dp.filename) index += 1 if not index % 10: print(100 * index / 170000, "%% done")
def __init__(self): ''' 读取one hot encoding之后的数据 ''' self.train_x, self.train_y = FileUtil.readCSV('train', './Dataset/train.csv') self.test_x = FileUtil.readCSV('test', './Dataset/test.csv')
def checkPreConditions(nextPeriod, driversFileName, vehiclesFileName, servicesFileName, reservationsFileName): """Checks the preconditions. Requires: The same as update (ommitted here to avoid redudancy) Ensures: returns bool value False if some of the conditions are not met and True otherwise """ headerDrivers = FileUtil(driversFileName).getHeader() headerVehicles = FileUtil(vehiclesFileName).getHeader() headerServices = FileUtil(servicesFileName).getHeader() headerReservations = FileUtil(reservationsFileName).getHeader() previousPeriod = Time().getPreviousPeriod(nextPeriod) # Changes the format of the period to the one in the header of files nextPeriodOther = nextPeriod[0:2] + ":00 - " + nextPeriod[2:4] + ":00" previousPeriodOther = previousPeriod[0:2] + ":00 - " + previousPeriod[ 2:4] + ":00" # NextPeriod is a str from the set 0911, 1113, ..., 1921 if nextPeriod not in ['0911', '1113', '1315', '1517', '1719', '1921']: return False # The files whose names are driversFileName, vehiclesFileName, servicesFileName and reservationsFileName # concern the same company and the same day; elif not (headerDrivers[INDEXCompany:INDEXDate + 1] == headerVehicles[INDEXCompany:INDEXDate + 1] == headerServices[INDEXCompany:INDEXDate + 1] == headerReservations[INDEXCompany:INDEXDate + 1]): return False # The file whose name is reservationsFileName concerns the period indicated by nextPeriod elif headerReservations[INDEXPeriod].strip() != nextPeriodOther: return False # The files whose names are driversFileName, vehiclesFileName, servicesFileName concern the period # immediately preceding the period indicated by nextPeriod; elif not (headerDrivers[INDEXPeriod].strip() == headerVehicles[INDEXPeriod].strip() == headerServices[INDEXPeriod].strip() == previousPeriodOther): return False # The file name reservationsFileName ends (before the .txt extension) with # the string nextPeriod; elif reservationsFileName[-8:-4] != nextPeriod: return False # The file names driversFileName, vehiclesFileName and servicesFileName # end (before their .txt extension) with the string representing # the period immediately preceding the one indicated by nextPeriod, # from the set 0709, 0911, ..., 1719; elif not (driversFileName[-8:-4] == vehiclesFileName[-8:-4] == servicesFileName[-8:-4] == previousPeriod): return False else: return True
def precalculate_tracelinks(self, output_precalculated_req_filename, output_precalculated_code_filename, req_embedding_creator, code_embedding_creator): """ if not req_embedding_creator: req_embedding_creator = self.default_req_emb_creator(self._word_emb_creator) if not code_embedding_creator: code_embedding_creator = self.default_code_emb_creator(self._word_emb_creator)""" if not output_precalculated_req_filename: output_precalculated_req_filename = self.default_precalculated_filename( req_embedding_creator.__class__.__name__) if not output_precalculated_code_filename: output_precalculated_code_filename = self.default_precalculated_filename( code_embedding_creator.__class__.__name__) req_embeddings = self._create_req_embeddings(req_embedding_creator) code_embeddings = self._create_code_embeddings(code_embedding_creator) FileUtil.write_dict_to_json( output_precalculated_req_filename, [req_emb.to_json() for req_emb in req_embeddings]) FileUtil.write_dict_to_json( output_precalculated_code_filename, [code_emb.to_json() for code_emb in code_embeddings]) self.build_precalculated_name_and_load( req_embedding_creator.__class__.__name__, code_embedding_creator.__class__.__name__)
def _process_eval_results(self, eval_result_matrix: EvalMatrix, output_file_suffix=""): log.info("Generationg csv...: ") assert len( self._run_config.elem_thresholds ) == 1, "Elem threshold needs to be a single threshold value" assert len( self._run_config.majority_thresholds ) == 1, "majority threshold needs to be a single threshold value" assert len( self._run_config.file_level_thresholds ) == 1, "file level threshold needs to be a single threshold value" e_thresh = self._run_config.elem_thresholds[0] m_thresh = self._run_config.majority_thresholds[0] f_thresh = self._run_config.file_level_thresholds[0] if not eval_result_matrix.is_none_entry(e_thresh, m_thresh, f_thresh): all_links = eval_result_matrix.all_trace_links( e_thresh, m_thresh, f_thresh) recall_map_dict = Evaluator.evaluateMAPRecall( all_links, self._trace_link_processor._dataset, self._trace_link_processor._run_config.reverse_compare) output_file_name = csv_recall_map_filename( self._trace_link_processor._dataset, output_file_suffix) FileUtil.write_recall_precision_csv(recall_map_dict, output_file_name) log.info("... Done: ") else: log.error( f"No trace links for e{e_thresh} m{m_thresh} f{f_thresh}")
def _process_eval_results(self, eval_result_matrix: EvalMatrix, output_file_suffix=""): log.info("Generationg csv...: ") for elem_thresh in eval_result_matrix.elem_threshs: recall_prec_dict = { } # use this to override duplicate recall values for m_thresh in eval_result_matrix.maj_threshs: for f_thresh in eval_result_matrix.file_level_threshs: if not eval_result_matrix.is_none_entry( elem_thresh, m_thresh, f_thresh): recall = eval_result_matrix.recall( elem_thresh, m_thresh, f_thresh) prec = eval_result_matrix.precision( elem_thresh, m_thresh, f_thresh) if recall == 0 and prec == 0: continue recall_prec_dict[recall] = prec #threshold_name = "_e{}m{}_".format(elem_thresh, self._run_config.majority_print[m_thresh]) threshold_name = "" output_file_name = csv_recall_precision_filename(self._trace_link_processor._dataset, self._trace_link_processor.output_prefix() \ + threshold_name + output_file_suffix) FileUtil.write_recall_precision_csv(recall_prec_dict, output_file_name) #FileUtil.write_dict_to_json(str(Paths.ROOT / output_file_suffix) + ".json", recall_prec_dict) log.info("... Done: ")
def __init__(self, file_name=None): """Creates a ReservationList composed by Reservation objects, from a file with a list of reservations. Requires: If given, file_name is str with the name of a .txt file containing a list of reservations organized as in the examples provided in the general specification (omitted here for the sake of readability). Ensures: if file_name is given: a ReservationList, composed by objects of class Service that correspond to the services listed in file with name file_name. if file_name is none: a empty ServiceList.""" UserList.__init__(self) if file_name is not None: inFile = FileUtil(file_name) for line in inFile.getContent(): reservData = line.rstrip().split(", ") reservClient = reservData[ReservationsList.INDEXClientNameInReservation] reservRequestedStartTime = Time(reservData[ReservationsList.INDEXRequestedStartHour]) reservRequestedEndTime = Time(reservData[ReservationsList.INDEXRequestedEndHour]) reservCircuit = reservData[ReservationsList.INDEXCircuitInReservation] reservCircuitKms = reservData[ReservationsList.INDEXCircuitKmsInReservation] newReserv = Reservation(reservClient, reservRequestedStartTime, reservRequestedEndTime, reservCircuit, reservCircuitKms) self.append(newReserv)
def __init__(self, args): self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__) username = args[Constant.WH_DB_USERNAME_KEY] password = args[Constant.WH_DB_PASSWORD_KEY] JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY] JDBC_URL = args[Constant.WH_DB_URL_KEY] self.db_id = args[Constant.JOB_REF_ID_KEY] self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY] self.conn_mysql = zxJDBC.connect(JDBC_URL, username, password, JDBC_DRIVER) self.conn_cursor = self.conn_mysql.cursor() if Constant.INNODB_LOCK_WAIT_TIMEOUT in args: lock_wait_time = args[Constant.INNODB_LOCK_WAIT_TIMEOUT] self.conn_cursor.execute("SET innodb_lock_wait_timeout = %s;" % lock_wait_time) temp_dir = FileUtil.etl_temp_dir(args, "ORACLE") self.input_table_file = os.path.join(temp_dir, args[Constant.ORA_SCHEMA_OUTPUT_KEY]) self.input_field_file = os.path.join(temp_dir, args[Constant.ORA_FIELD_OUTPUT_KEY]) self.input_sample_file = os.path.join(temp_dir, args[Constant.ORA_SAMPLE_OUTPUT_KEY]) self.collect_sample = False if Constant.ORA_LOAD_SAMPLE in args: self.collect_sample = FileUtil.parse_bool(args[Constant.ORA_LOAD_SAMPLE], False) self.logger.info("Load Oracle Metadata into {}, db_id {}, wh_exec_id {}" .format(JDBC_URL, self.db_id, self.wh_etl_exec_id))
def __init__(self, args): self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__) username = args[Constant.WH_DB_USERNAME_KEY] password = args[Constant.WH_DB_PASSWORD_KEY] JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY] JDBC_URL = args[Constant.WH_DB_URL_KEY] self.db_id = args[Constant.DB_ID_KEY] self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY] self.conn_mysql = zxJDBC.connect(JDBC_URL, username, password, JDBC_DRIVER) self.conn_cursor = self.conn_mysql.cursor() if Constant.INNODB_LOCK_WAIT_TIMEOUT in args: lock_wait_time = args[Constant.INNODB_LOCK_WAIT_TIMEOUT] self.conn_cursor.execute("SET innodb_lock_wait_timeout = %s;" % lock_wait_time) temp_dir = FileUtil.etl_temp_dir(args, "ORACLE") self.input_table_file = os.path.join(temp_dir, args[Constant.ORA_SCHEMA_OUTPUT_KEY]) self.input_field_file = os.path.join(temp_dir, args[Constant.ORA_FIELD_OUTPUT_KEY]) self.input_sample_file = os.path.join(temp_dir, args[Constant.ORA_SAMPLE_OUTPUT_KEY]) self.collect_sample = False if Constant.ORA_LOAD_SAMPLE in args: self.collect_sample = FileUtil.parse_bool(args[Constant.ORA_LOAD_SAMPLE], False) self.logger.info("Load Oracle Metadata into {}, db_id {}, wh_exec_id {}" .format(JDBC_URL, self.db_id, self.wh_etl_exec_id))
def __init__(self, file_name=None): """Creates a VehicleDict composed by vehicles objects, from a file with a list of vehicles. Requires: If given, file_name is str with the name of a .txt file containing a list of vehicles organized as in the examples provided in the general specification (omitted here for the sake of readability). Ensures: if file_name is given: a VehiclesDict, composed by objects of class Vehicle that correspond to the vehicles listed in file with name file_name. if file_name is none: a empty VehiclesList. """ UserDict.__init__(self) inFile = FileUtil(file_name) for line in inFile.getContent(): vehicleData = line.rstrip().split(", ") vehiclePlate = vehicleData.pop(VehiclesDict.INDEXVehiclePlate) vehicleModel, vehicleAutonomy, vehicleKms = vehicleData newVehicle = Vehicle(vehiclePlate, vehicleModel, vehicleAutonomy, vehicleKms) self[vehiclePlate] = newVehicle
def switch_4g(main_obj): print("配置MAC:" + str(Constant.MAC)) mac_cmd = "ifconfig eth0 hw ether " + Constant.MAC + " & ifconfig lo up" os.system(mac_cmd) time.sleep(2) cmd = "ifconfig eth0 " + str(Constant.IP) + " netmask " + str( Constant.IPMASK) print(cmd) os.system(cmd) if main_obj is not None: main_obj.start_server_thread() if "1" == Constant.G4EN: Constant.network = 2 print("开启4G") Constant.wtire_gpio(1, 0) time.sleep(3) Constant.wtire_gpio(1, 1) time.sleep(30) os.system('echo "AT\$QCRMCALL=1,1" > /dev/ttyUSB2') time.sleep(10) os.system("ifconfig wwan0 up") time.sleep(2) os.system("udhcpc -i wwan0 &") print("启用4G完成") FileUtil.write_dns(Constant.NAMESERVER_PATH, Constant.DNS1, Constant.DNS2) # os.system(Constant.NAMESERVER_PATH) cmd = "route add default gw " + Constant.GATEWAY print(cmd) os.system("route add default gw " + Constant.GATEWAY) print("网络配置完成")
def process_case3(as_file, dir_in, out_file_path): project, *rest = as_file.split("_ArchSmells.csv") if os.path.isfile(os.path.join(dir_in, project + "_DesignSmells.csv")): ds_file = os.path.join(dir_in, project + "_DesignSmells.csv") with open(os.path.join(dir_in, as_file)) as asf: for line in asf: smell, aproject, namespace, cause, *rest = line.split(",") if (smell == 'God Component'): reason = "" reason_class = True for m in re.finditer(r'component are: (\d+)', cause, re.IGNORECASE): reason = m.group(1) reason_class = True if reason == "": for m in re.finditer(r'LOC of the component: (\d+)', cause, re.IGNORECASE): reason = m.group(1) reason_class = False insuff_abs = 0 with open(ds_file) as dsf: for ds_line in dsf: dsmell, dnamespace, *drest = ds_line.split(",") if namespace == dnamespace: if dsmell == "Insufficient Modularization": insuff_abs += 1 if reason_class: FileUtil.writeFile( out_file_path, aproject + "," + namespace + ",1," + str(reason) + ",," + str(insuff_abs)) else: FileUtil.writeFile( out_file_path, aproject + "," + namespace + ",1,," + str(reason) + "," + str(insuff_abs))
def process_GC(line, OUT_FILE_PATH, dir): smell, project, namespace, cause, *rest = line.split(",") if (smell == 'God Component'): for m in re.finditer(r'component are: (\d+)', cause, re.IGNORECASE): reason = m.group(1) FileUtil.writeFile(os.path.join(OUT_FILE_PATH, "smellsInfo_GC.csv"), dir + "," + smell + "," + project +\ "," + namespace + "," + reason)
def __init__(self, file_level_similarity_csv_file, reverse_similarity=False, req_file_ext=None, code_file_ext=None): """ All similarities have to be between 0 and 1 reverse_similarity=True if the smaller the better """ self._similarity_dataframe = FileUtil.read_csv_to_dataframe( file_level_similarity_csv_file) self._file_path = file_level_similarity_csv_file self._reverse_similarity = reverse_similarity if req_file_ext is not None: modified_reqs = {} for req in self.all_req_files(): modified_reqs[req] = FileUtil.set_extension(req, req_file_ext) self._similarity_dataframe.rename(index=modified_reqs, inplace=True) if code_file_ext is not None: modified_code = {} for code in self.all_code_files(): modified_code[code] = FileUtil.set_extension( code, code_file_ext) self._similarity_dataframe.rename(columns=modified_code, inplace=True)
def __init__(self, file_path, vector=None, sub_vectors=[]): self.file_path = file_path self.vector = vector self.sub_vectors = sub_vectors self.file_name = FileUtil.get_filename_from_path(self.file_path) self.file_name_without_extension = FileUtil.get_filename_without_extension__from_path( self.file_path)
def getEssay(): print 'about to get essay' baseurlLoo = 'http://www.luoo.net/essay/'; for x in range(84,100): print x content = urllib2.urlopen(baseurlLoo+str(x)).read(); soup = BeautifulSoup(''.join(content)); if soup.findAll('div','error-msg'): continue; else : title = soup.find('h1','essay-title').text essayCont = soup.find('div','essay-content').text; FileUtil.mkDir('./'+title) FileUtil.saveFile('./'+title+'/'+title+'.txt',essayCont) Essay = soup.find('div','essay-content'); picUrls = Essay.findAll('img') for div in picUrls: picUrl = dict(div.attrs)['src'] last = picUrl.rfind('/') picName = picUrl[last+1:] urllib.urlretrieve(picUrl,'./'+title+'/'+title+picName) print 'essay get over'
def _write_dronology_trace_matrix(requirement_dict, file): all_trace_links_string = [] for req_name in requirement_dict: if requirement_dict[req_name]: all_trace_links_string.append(req_name + ":" + " ".join( [class_name for class_name in requirement_dict[req_name]])) FileUtil.write_file(file, "\n".join(all_trace_links_string))
def UpgradeDependency(addon_id, currentVersion): if os.path.exists(os.path.join(__lib__, addon_id)): if os.path.exists(os.path.join(__AddonPath__, addon_id)): version = xbmcaddon.Addon(addon_id).getAddonInfo('version') if not version == currentVersion: FileUtil.TargetFileUpdate(addon_id, __AddonPath__, isFolder = True) else: FileUtil.TargetFileUpdate(addon_id, __AddonPath__, isFolder = True)
def process_UD(line, OUT_FILE_PATH, dir): smell, project, namespace, cause, *rest = line.split(",") if (smell == 'Unstable Dependency'): for m in re.finditer(r'less stable component\(s\): ((\w|\.)*)', cause, re.IGNORECASE): reason = m.group(1) FileUtil.writeFile(os.path.join(OUT_FILE_PATH, "smellsInfo_UD.csv"), dir + "," + smell + "," + project +\ "," + namespace + "," + reason)
def _write_code_entries(self, chosen_entries, remaining_entries): FileUtil.write_rows_to_csv_file( code_csv_filename(self._split_percent, self._dataset, self._tracelink_type), chosen_entries) FileUtil.write_rows_to_csv_file( code_csv_filename(Util.complement(self._split_percent), self._dataset, self._tracelink_type), remaining_entries)
def constructFromFile(self): self.constructNewBoard() try: self.file = FileUtil.openForRead(os.path.join(FileUtil.getProgramDirectory(), "maps", self.mapName + ".battlefield")) except IOError: print "Error loading map" ships = Formatter.stripShips(Formatter.convertMatrix(self.file)) self.board.addShips(ships)
def do(): # 生成文件修改行数记录表-----Local fileds = ['文件路径', '总增加行数', '总删除行数', '总修改行数', '修改次数', '总行数'] for project in conf.projects: for type in conf.type: FileUtil.import_local_rows_excel( fileds, local_modify_rows.getResult(project, type), project, type)
def precalculate_tracelinks(self, output_precalculated_req_filename, output_precalculated_code_filename, req_embedding_creator=None, code_embedding_creator=None, output_suffix=""): if not req_embedding_creator: req_embedding_creator = self.default_req_emb_creator( self._word_emb_creator) if not code_embedding_creator: code_embedding_creator = self.default_code_emb_creator( self._word_emb_creator) if not output_precalculated_req_filename: output_precalculated_req_filename = self.default_precalculated_filename( req_embedding_creator.__class__.__name__, output_suffix) if not output_precalculated_code_filename: output_precalculated_code_filename = self.default_precalculated_filename( code_embedding_creator.__class__.__name__, output_suffix) req_embeddings = self._create_req_embeddings(req_embedding_creator) code_embeddings = self._create_code_embeddings(code_embedding_creator) for cg_emb in code_embeddings: assert isinstance(cg_emb, MethodCallGraphEmbeddingMultipleSims) for method_name_key in cg_emb.methods_dict: for req_file in req_embeddings: assert isinstance(req_file, RequirementEmbedding) req_parts = self._choose_req_part( req_file ) # choose if using partial vectors or whole vector sims_of_all_parts = [ Util.calculate_cos_sim( req_vector, cg_emb.get_method_vector(method_name_key)) for req_vector in req_parts ] cg_emb.add_method_sim(method_name_key, sims_of_all_parts, req_file.file_name) for other_key in cg_emb.non_cg_dict: for req_file in req_embeddings: assert isinstance(req_file, RequirementEmbedding) req_parts = self._choose_req_part( req_file ) # choose if using partial vectors or whole vector sims_of_all_parts = [ Util.calculate_cos_sim( req_vector, cg_emb.get_non_cg_vector(other_key)) for req_vector in req_parts ] cg_emb.add_non_cg_sim(other_key, sims_of_all_parts, req_file.file_name) FileUtil.write_dict_to_json( output_precalculated_req_filename, [req_emb.to_json() for req_emb in req_embeddings]) FileUtil.write_dict_to_json( output_precalculated_code_filename, [code_emb.to_json() for code_emb in code_embeddings])
def __init__(self, ital=False): if ital: stopwords_as_string = FileUtil.read_textfile_into_string( ITAL_CODE_STOPWORD_FILEPATH) else: stopwords_as_string = FileUtil.read_textfile_into_string( CODE_STOPWORD_FILEPATH) self._stop_words = stopwords_as_string.split("\n")
def btnsave(): form = json.loads(str(request.data, encoding="utf-8"), encoding="utf-8") print(form) try: FileUtil.write_json_data(Constant.CONF_FILE_PATH, form) return jsonify({'s': 0, "randomNum": randomNum}) except Exception as e: print(str(e)) return jsonify({'s': 1, "randomNum": randomNum})
def main(resources_file_path, base_url, scratch_func): old_data = FileUtil.read(resources_file_path) new_data = scratch_func(base_url, old_data) if new_data: date_new_data = "//" + datetime.now().strftime('%Y-%m-%d') + "\n" + "\n".join(new_data) + "\n" FileUtil.append(resources_file_path, date_new_data) MongoUtil.insert(resources_file_path, date_new_data) else: print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '----', getattr(scratch_func, '__name__'), ": nothing to update ")
def loadImage(self, path): imgs = [] cvHelp = CvHelp() fileUtil = FileUtil() (files, counts, dirs) = fileUtil.getSamplesLabelsDirnames("number") for file in files: img = cv2.resize(cvHelp.openGray(file), (IMAGE_SIZE, IMAGE_SIZE)) imgs.append(img) return imgs, counts, dirs
def process_DS(line, OUT_FILE_PATH, dir): smell, project, namespace, cause, *rest = line.split(",") if (smell == 'Dense Structure'): for m in re.finditer(r'Average degree = (\d+.\d+)', cause, re.IGNORECASE): reason = m.group(1) FileUtil.writeFile( os.path.join(OUT_FILE_PATH, "smellsInfo_DS.csv"), dir + "," + smell + "," + reason)
def getList(self): ''' wordList依赖rawStr,但获取较慢,且经常用,因此设置为静态属性 ''' wordList = FileUtil.cutWords(self.rawStr) # 去燥并分词 wordList = FileUtil.rmStopwords(wordList) # 去停用词 self.wordList = wordList return self.wordList
def __init__(self, dataset, direction: RelationDirection, inheritance_graph_path=None): if not inheritance_graph_path: inheritance_graph_path = Paths.inheritance_graph_filename(dataset) self._implements_inheritance_dict = FileUtil.read_dict_from_json( inheritance_graph_path) self._direction = direction self._class2file_map = FileUtil.read_dict_from_json( Paths.classifier_to_file_map_filename(dataset))
def __init__(self, args): self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__) # connection self.username = args[Constant.HIVE_METASTORE_USERNAME] self.password = args[Constant.HIVE_METASTORE_PASSWORD] self.jdbc_driver = args[Constant.HIVE_METASTORE_JDBC_DRIVER] self.jdbc_url = args[Constant.HIVE_METASTORE_JDBC_URL] self.connection_interval = int(args[Constant.HIVE_METASTORE_RECONNECT_TIME]) self.logger.info("DB re-connection interval: %d" % self.connection_interval) self.db_whitelist = args[Constant.HIVE_DATABASE_WHITELIST_KEY] if Constant.HIVE_DATABASE_WHITELIST_KEY in args else "''" self.db_blacklist = args[Constant.HIVE_DATABASE_BLACKLIST_KEY] if Constant.HIVE_DATABASE_BLACKLIST_KEY in args else "''" self.logger.info("DB whitelist: " + self.db_whitelist) self.logger.info("DB blacklist: " + self.db_blacklist) self.conn_hms = None self.connect_time = None self.db_connect(True) hdfs_namenode_ipc_uri = args.get(Constant.HDFS_NAMENODE_IPC_URI_KEY, None) kerberos_principal = args.get(Constant.KERBEROS_PRINCIPAL_KEY, None) keytab_file = args.get(Constant.KERBEROS_KEYTAB_FILE_KEY, None) kerberos_auth = False if Constant.KERBEROS_AUTH_KEY in args: kerberos_auth = FileUtil.parse_bool(args[Constant.KERBEROS_AUTH_KEY], False) self.table_whitelist_enabled = False if Constant.HIVE_TABLE_WHITELIST_ENABLED in args: self.table_whitelist_enabled = FileUtil.parse_bool(args[Constant.HIVE_TABLE_WHITELIST_ENABLED], False) self.table_blacklist_enabled = False if Constant.HIVE_TABLE_BLACKLIST_ENABLED in args: self.table_blacklist_enabled = FileUtil.parse_bool(args[Constant.HIVE_TABLE_BLACKLIST_ENABLED], False) self.schema_url_helper = SchemaUrlHelper.SchemaUrlHelper(hdfs_namenode_ipc_uri, kerberos_auth, kerberos_principal, keytab_file) # global variables self.databases = None self.db_dict = {} # name : index self.table_dict = {} # fullname : index self.dataset_dict = {} # name : index self.instance_dict = {} # name : index self.serde_param_columns = [] # counting statistics self.external_url = 0 self.hdfs_count = 0 self.schema_registry_count = 0
def parseCreatingLine(testExecutableDirectoryName, line): search = re.search("(.*):creating '(.*)'", line, re.IGNORECASE) result = None if search: fileName = os.path.abspath(testExecutableDirectoryName + "/" + search.group(1)) gcovFileName = os.path.abspath(testExecutableDirectoryName + "/" + search.group(2)) result = { "fileName" : fileName, "gcovFileName" : gcovFileName, "gcovLines" : FileUtil.readFileLines(gcovFileName) } return result
def readVideo(): d={} for line in FileUtil.openUncertainDirFile(["../readDB/","./","./readDB/,/home/data/recomData_tmp"],"recipe.video.txt"): cols=line.strip().split() if len(cols) < 2: continue if cols[0].endswith("L"): cols[0]=cols[0][0:-1] d[cols[0]]=cols[1] d[int(cols[0])]=cols[1] return d
def __init__(self, fileName, exemplarCommand, exemplarConfig): self.fileName = fileName self.nonCompliantLines = 0 self.score = 0 self.exemplarCommand = exemplarCommand self.exemplarConfig = exemplarConfig try: self.fileData = FileUtil.readFileString(self.fileName) self.totalLines = len(self.fileData.splitlines()) except IOError, e: print >> sys.stderr, e sys.exit(1)
def __init__(self, args): self.prefix = args.prefix self.name = args.name self.version = validateArgument(args.version) self.miniNotice = "" self.shortNotice = "" self.longNotice = "" self.about = None self.what = None self.args = args self.miniNotice = FileUtil.readFileString(args.miniNotice) self.shortNotice = FileUtil.readFileString(args.shortNotice) self.longNotice = FileUtil.readFileString(args.longNotice) self.buildDate = datetime.datetime.utcnow().isoformat() if self.version == None: self.version = " RELEASE_VERSION " if self.about == None: self.about = createQuotedCString("%s " % (self.name)) + \ self.version + \ createQuotedCString(" %s" % (self.buildDate)) + " " + \ createQuotedCString("\n%s" % (self.miniNotice)) if self.what == None: if self.miniNotice != None: notice = "\n".join(map(lambda line: "\t" + line, self.miniNotice.split("\n")[:-1])) else: notice = "" self.what = createQuotedCString(whatLineToken) + " " + \ createQuotedCString(self.name + " ") + " " + \ self.version + " " + \ createQuotedCString(" " + self.buildDate) + "\n" + \ createQuotedCString(whatLineToken) + " " + \ createQuotedCString(notice) return
def __init__(self, file_name=None): """Creates a ServicesList composed by Services objects, from a file with a list of services. Requires: If given, file_name is str with the name of a .txt file containing a list of services organized as in the examples provided in the general specification (omitted here for the sake of readability). Ensures: if file_name is given: a ServiceList, composed by objects of class Service that correspond to the services listed in file with name file_name. In this ServiceList, drivers terminating their services earlier have priority over the ones terminating later; lexicographic order of drivers's names decides eventual ties in each case above. if file_name is none: a empty ServiceList. """ # creates empty ServicesList UserList.__init__(self) # if file_name is given, self is populated with Services corresponding to the # services on the file file_name if file_name is not None: inFile = FileUtil(file_name) for line in inFile.getContent(): servData = line.rstrip().split(", ") servDriver = servData[ServicesList.INDEXDriverName] servPlate = servData[ServicesList.INDEXVehiclePlate] servClient = servData[ServicesList.INDEXClientName] servDeparTime = Time(servData[ServicesList.INDEXDepartureHour]) servArrivalTime = Time(servData[ServicesList.INDEXArrivalHour]) servCircuit = servData[ServicesList.INDEXCircuitId] servCircuitKms = servData[ServicesList.INDEXCircuitKms] servDriverStatus = servData[ServicesList.INDEXDriverStatus] newService = Service(servDriver, servPlate, servClient, servDeparTime, servArrivalTime, \ servCircuit, servCircuitKms, servDriverStatus) self.append(newService)
def __init__(self, wh_etl_exec_id='0'): self.logger = LoggerFactory.getLogger("%s[%s]" % (self.__class__.__name__, wh_etl_exec_id)) # set up connection username = args[Constant.WH_DB_USERNAME_KEY] password = args[Constant.WH_DB_PASSWORD_KEY] JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY] JDBC_URL = args[Constant.WH_DB_URL_KEY] self.conn_mysql = zxJDBC.connect(JDBC_URL, username, password, JDBC_DRIVER) self.conn_cursor = self.conn_mysql.cursor() if Constant.INNODB_LOCK_WAIT_TIMEOUT in args: lock_wait_time = args[Constant.INNODB_LOCK_WAIT_TIMEOUT] self.conn_cursor.execute("SET innodb_lock_wait_timeout = %s;" % lock_wait_time) temp_dir = FileUtil.etl_temp_dir(args, "HIVE") self.input_schema_file = os.path.join(temp_dir, args[Constant.HIVE_SCHEMA_CSV_FILE_KEY]) self.input_field_file = os.path.join(temp_dir, args[Constant.HIVE_FIELD_METADATA_KEY]) self.input_instance_file = os.path.join(temp_dir, args[Constant.HIVE_INSTANCE_CSV_FILE_KEY]) self.input_dependency_file = os.path.join(temp_dir, args[Constant.HIVE_DEPENDENCY_CSV_FILE_KEY]) self.db_id = args[Constant.JOB_REF_ID_KEY] self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY]
if __name__ == "__main__": args = sys.argv[1] # connection username = args[Constant.ORA_DB_USERNAME_KEY] password = args[Constant.ORA_DB_PASSWORD_KEY] JDBC_DRIVER = args[Constant.ORA_DB_DRIVER_KEY] JDBC_URL = args[Constant.ORA_DB_URL_KEY] e = OracleExtract() e.conn_db = zxJDBC.connect(JDBC_URL, username, password, JDBC_DRIVER) exclude_databases = filter(bool, args[Constant.ORA_EXCLUDE_DATABASES_KEY].split(',')) collect_sample = False if Constant.ORA_LOAD_SAMPLE in args: collect_sample = FileUtil.parse_bool(args[Constant.ORA_LOAD_SAMPLE], False) temp_dir = FileUtil.etl_temp_dir(args, "ORACLE") table_output_file = os.path.join(temp_dir, args[Constant.ORA_SCHEMA_OUTPUT_KEY]) field_output_file = os.path.join(temp_dir, args[Constant.ORA_FIELD_OUTPUT_KEY]) sample_output_file = os.path.join(temp_dir, args[Constant.ORA_SAMPLE_OUTPUT_KEY]) try: e.conn_db.cursor().execute("ALTER SESSION SET TIME_ZONE = 'US/Pacific'") e.conn_db.cursor().execute("ALTER SESSION SET NLS_DATE_FORMAT = 'YYYY-MM-DD HH24:MI:SS'") e.conn_db.cursor().execute("CALL dbms_application_info.set_module('%s','%d')" % ('WhereHows (Jython)', os.getpid())) e.conn_db.commit() e.run(exclude_databases, None,
curs.execute(sql) #self.debug(sql) rows = curs.fetchall() curs.close() return rows def db_connect(self, init=False): if init or (datetime.now() - self.connect_time).total_seconds() > self.connection_interval: if self.conn_hms: self.conn_hms.close() self.conn_hms = zxJDBC.connect(self.jdbc_url, self.username, self.password, self.jdbc_driver) self.logger.info("Connected to Hive metadata-store DB") self.connect_time = datetime.now() if __name__ == "__main__": args = sys.argv[1] e = HiveExtract(args) temp_dir = FileUtil.etl_temp_dir(args, "HIVE") schema_json_file = os.path.join(temp_dir, args[Constant.HIVE_SCHEMA_JSON_FILE_KEY]) hdfs_map_csv_file = os.path.join(temp_dir, args[Constant.HIVE_HDFS_MAP_CSV_FILE_KEY]) try: e.databases = e.get_all_databases(e.db_whitelist, e.db_blacklist) e.run(schema_json_file, None, hdfs_map_csv_file) finally: if e.conn_hms: e.conn_hms.close()
self.conn_cursor.execute(cmd) self.conn_mysql.commit() if __name__ == "__main__": args = sys.argv[1] l = TeradataLoad() # set up connection username = args[Constant.WH_DB_USERNAME_KEY] password = args[Constant.WH_DB_PASSWORD_KEY] JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY] JDBC_URL = args[Constant.WH_DB_URL_KEY] temp_dir = FileUtil.etl_temp_dir(args, "TERADATA") l.input_file = os.path.join(temp_dir, args[Constant.TD_METADATA_KEY]) l.input_field_file = os.path.join(temp_dir, args[Constant.TD_FIELD_METADATA_KEY]) l.input_sampledata_file = os.path.join(temp_dir, args[Constant.TD_SAMPLE_OUTPUT_KEY]) do_sample = False if Constant.TD_LOAD_SAMPLE in args: do_sample = FileUtil.parse_bool(args[Constant.TD_LOAD_SAMPLE], False) if datetime.datetime.now().strftime('%a') not in args[Constant.TD_COLLECT_SAMPLE_DATA_DAYS]: do_sample = False l.db_id = args[Constant.JOB_REF_ID_KEY] l.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY] l.conn_mysql = zxJDBC.connect(JDBC_URL, username, password, JDBC_DRIVER) l.conn_cursor = l.conn_mysql.cursor()
import ExcelUtil import FileUtil dict = ExcelUtil.get(r"C:\Users\zhaoyiwei\Desktop\超时还款记录\超时还款记录0604.xlsx",1,1) result = [] target = '{{"repayOuterNo":"{0}","loanOuterNo":"{1}","repayTime":"{2}","status":{3},"outterMessage":"","repaySource":{4},"repayPlanListString":"[{{\"repayPlanId\":{5},\"principal\":{6},\"breach\":0,\"charge\":0,\"interest\":{7},\"earlyRepayBreach\":0,\"penalty\":0,\"installmentNo\":{8}}}]" }}' # target1 = '{{"repayOuterNo":"{0}"}}' for key in dict: line = dict[key] print(line) temp = target.format(line[0].strip(),line[1].strip(),line[3].strip(),round(line[4]),round(line[5]),line[6],line[7],line[10],line[13]) # print(line[0]) # temp = target.format(line[0].strip()) print(temp) result.append(temp) FileUtil.write(r"C:\Users\zhaoyiwei\Desktop\123\666.txt","a+",result)
import sys sys.path.append("./") sys.path.append("./util") import FileUtil TMP_DATA_DIR="/home/data/recomeData_tmp/" from column import * from boundedQueue import * MAX_COOCUUR_NUM=15 rs={} for line in FileUtil.openUncertainDirFile(["./",TMP_DATA_DIR],"quality.rid"): rs[line.strip()]=1 def coOccur(): users={} userTags={} for line in sys.stdin: cols=line.split("\t") if len(cols) < APP_LOG_COLUMNS: #print line.strip() #print len(cols) continue userid=uuid(cols) #cols[USER_CID] #if userid == '0': # userid=cols[IP_CID] rid=getRid(cols) #sys.stdout.write(line+"\n")