def files_rank_correlation(fileA, fileB): # read in the file data lines A_lines = utils.file_read(fileA) B_lines = utils.file_read(fileB) assert len(A_lines) == len(B_lines) correlations = [] # for each line calculate the correlation between the two files for i in range(len(A_lines)): # assign an order rank to the values in the line A_dict_rank = {val: j for j, val in enumerate(A_lines[i].split())} B_dict_rank = {val: j for j, val in enumerate(B_lines[i].split())} A_rank = [] B_rank = [] # for each value in line A get the rank order in list A and B keys = list(A_dict_rank.keys()) max_rank = len(keys) for key in keys: A_rank.append(A_dict_rank[key]) B_rank.append(B_dict_rank.get(key, max_rank)) # calculate the spearman's correlation coefficient between the two rankings corr, _ = stats.spearmanr(A_rank, B_rank) # print(A_rank) # print(B_rank) correlations.append(corr) return correlations
def generate(self, path): """generate an overlay permissions file""" perm_file = path + '.permissions' overlay_contents = [] for root, dirs, files in os.walk(path): for sub_dir in dirs: overlay_contents.append(os.path.join(root, sub_dir)) for file_name in files: if not file_name == '.gitignore': overlay_contents.append(os.path.join(root, file_name)) perm_list = {} for entry in overlay_contents: stat = os.stat(entry) mode = int(oct(stat.st_mode)[3:]) entry = entry.split(path, 1)[1] if entry == '/root': perm_list[entry] = ('0700', 0, 0) elif entry.endswith('.ssh'): perm_list[entry] = ('0700', 0, 0) elif entry.endswith('authorized_keys'): perm_list[entry] = ('0700', 0, 0) elif entry.startswith('/usr/local/bin'): perm_list[entry] = ('0755', 0, 0) elif entry == '/etc/rc.local': perm_list[entry] = ('0755', 0, 0) else: perm_list[entry] = ('%04d' % mode, 0, 0) if os.path.isfile(perm_file): data = utils.file_read(perm_file).split('\n') defined_list = {} for line in data: try: mode, uid, gid, real_path = line.split('\t') except ValueError: pass else: defined_list[real_path] = (mode, uid, gid) for real_path in perm_list: if real_path in defined_list: perm_list[real_path] = defined_list[real_path] data = [] header_file = os.path.join(self.cfg['paths']['templates'], self.cfg['templates']['permission_script']) header = utils.file_read(header_file) data.append(header.strip()) for key, value in perm_list.items(): data.append('%s\t%s\t%s\t%s' % (value[0], value[1], value[2], key)) utils.file_write(perm_file, '\n'.join(data) + '\n')
def recorrerCarpeta(self, archivo): # print(archivo) if os.path.isdir(archivo): for ruta in os.listdir(archivo): self.recorrerCarpeta(archivo+"/"+ruta) elif os.path.basename(archivo)==self.nombreArchivo and os.path.basename(os.path.dirname(archivo))==self.nombreCarpeta: utils.file_write(archivo, utils.file_read(self.filepath))
def pxe_variables(cfg, address): """try to read the pxe variables from the pxelinux.cfg file""" file_name = os.path.join(cfg['paths']['tftpboot'], 'pxelinux.cfg', address) if not os.path.isfile(file_name): raise Exception('"%s" does not exist' % file_name) data = utils.file_read(file_name) data = re.search( r'# \*\*\* start - seedBank pxe variables \*\*\*$(.*?)\*\*\*', data, re.M | re.S) data = data.group(1).strip() data = re.sub('(?m)^#', '', data) data = [line.strip() for line in data.split('\n') if line] data = [line.split(' =', 1) for line in data] data = [(line[0].strip(), line[1].strip()) for line in data] lines = [] values = ['None', 'False', 'True'] for line in data: if line[1].startswith('[') or line[1] in values: lines.append((line[0], ast.literal_eval(line[1]))) else: lines.append(line) result = dict(lines) return result
def puppet(manifest): """return the generated seedbank bootstrap init file""" file_name = os.path.join(cfg['paths']['templates'], cfg['templates']['puppet_manifest']) result = utils.file_read(file_name) result = utils.apply_template(result, {'manifest': manifest}, file_name) return result
def pxe_variables(cfg, address): """try to read the pxe variables from the pxelinux.cfg file""" file_name = os.path.join(cfg['paths']['tftpboot'], 'pxelinux.cfg', address) if not os.path.isfile(file_name): raise Exception('"%s" does not exist' % file_name) data = utils.file_read(file_name) data = re.search( r'# \*\*\* start - seedBank pxe variables \*\*\*$(.*?)\*\*\*', data, re.M|re.S) data = data.group(1).strip() data = re.sub('(?m)^#', '', data) data = [line.strip() for line in data.split('\n') if line] data = [line.split(' =', 1) for line in data] data = [(line[0].strip(), line[1].strip()) for line in data] lines = [] values = ['None', 'False', 'True'] for line in data: if line[1].startswith('[') or line[1] in values: lines.append((line[0], ast.literal_eval(line[1]))) else: lines.append(line) result = dict(lines) return result
def fuzz_check(input_file, n_tests, to_randomize): """ Check user inputs and process them as required by fuzz """ input_seed = utils.file_read(input_file) l = len(input_seed) print('\nInput size is', l, 'bytes') try: n_tests = int(n_tests) if n_tests < 1: usage('N_TESTS should be > 0') except ValueError: usage('"' + str(n_tests) + '" is not a valid integer') print('Running test', n_tests, 'times') try: n_bytes = round(l * float(to_randomize)) if n_bytes < 0 or n_bytes > l: usage('PART_TO_RANDOMIZE should be between 0 and 1') except ValueError: usage('"' + to_randomize + '" is not a valid float') print('Randomizing', n_bytes, 'bytes\n') return input_seed, n_tests, n_bytes
def script(self, dst, overlay, prefix): """generate the permissions script which will be applied before the end of an installation""" path = os.path.join(self.cfg['paths']['overlays'], overlay) perm_file = path + '.permissions' perm_script = os.path.join(dst, 'fix_perms.sh') if os.path.isfile(perm_file): data = utils.file_read(perm_file).strip() lines = data.split('\n') lines = [line for line in lines if not line.startswith('#')] script = [] for line in lines: try: mode, uid, gid, real_path = line.split('\t') except ValueError: err = '%s is corrupt, delete or regenerate it with '\ 'the "seedbank manage --overlay" command, or fix the '\ 'file manually, line "%s" contains errors' % \ (perm_file, line) raise utils.FatalException(err) else: if prefix: real_path = os.path.join(prefix, real_path[1:]) if real_path.endswith('.sb_template'): real_path = os.path.splitext(real_path)[0] script.append('chown %s:%s %s' % (uid, gid, real_path)) script.append('chmod %s %s' % (mode, real_path)) utils.file_write(perm_script, '\n'.join(script)) else: logging.warning('overlay "%s" has been selected but permission ' 'file "%s" does not exist, so all files will be owned by root ' 'and will keep the current permissons which could lead to ' 'problems', overlay, perm_file) utils.file_write(perm_script, '')
def aplicarSonar(ruta): text=utils.file_read(ruta) text=quitCurlyAlones(text) utils.file_write(ruta, text)
def script(self, dst, overlay, prefix): """generate the permissions script which will be applied before the end of an installation""" path = os.path.join(self.cfg['paths']['overlays'], overlay) perm_file = path + '.permissions' perm_script = os.path.join(dst, 'fix_perms.sh') if os.path.isfile(perm_file): data = utils.file_read(perm_file).strip() lines = data.split('\n') lines = [line for line in lines if not line.startswith('#')] script = [] for line in lines: try: mode, uid, gid, real_path = line.split('\t') except ValueError: err = '%s is corrupt, delete or regenerate it with '\ 'the "seedbank manage --overlay" command, or fix the '\ 'file manually, line "%s" contains errors' % \ (perm_file, line) raise utils.FatalException(err) else: if prefix: real_path = os.path.join(prefix, real_path[1:]) if real_path.endswith('.sb_template'): real_path = os.path.splitext(real_path)[0] script.append('chown %s:%s %s' % (uid, gid, real_path)) script.append('chmod %s %s' % (mode, real_path)) utils.file_write(perm_script, '\n'.join(script)) else: logging.warning( 'overlay "%s" has been selected but permission ' 'file "%s" does not exist, so all files will be owned by root ' 'and will keep the current permissons which could lead to ' 'problems', overlay, perm_file) utils.file_write(perm_script, '')
def run(self, edit): paquete_snippets=sublime.packages_path()+os.sep+"snippets" lista=[] comandos=[] for archivo in utils.get_files({"folder":paquete_snippets, "ext":"json"}): snip=utils.load_json(archivo) lista=lista + list(snip.keys()) lista=list(set(lista)) for snippet in lista: snippet=snippet.lower().replace("-", "_").replace("(", "").replace(")", "").replace(" ", "").replace("?", "").replace(":", "") utils.file_write(RUTA_COMANDOS+"code_"+snippet+".bat", "echo code_"+snippet+" > d:/sublime3/comando.txt") comandos.append("code_"+snippet) archivos_plantillas=utils.get_files({"folder":RUTA_PLANTILLAS}) for plantilla in archivos_plantillas: plantilla=os.path.basename(plantilla) if plantilla.rfind(".")!=-1:plantilla=plantilla[:plantilla.rfind(".")] plantilla=plantilla.replace(" ", "_").lower() utils.file_write(RUTA_COMANDOS+"make_"+plantilla+".bat", "echo make_"+plantilla+" > d:/sublime3/comando.txt") comandos.append("make_"+plantilla) archivos_python=utils.get_files({"folder":sublime.packages_path(), "ext":".py"}) for programa in archivos_python: rutaPrograma=programa try:programa=utils.file_read(programa) except: print("saco error al leer : "+rutaPrograma) continue comandosPython=re.findall("class ([\w]+)\(sublime_plugin.TextCommand\)",programa, re.IGNORECASE) for comandoPython in comandosPython: comandoPython=comandoPython[0].lower()+comandoPython[1:] cp="" for c in comandoPython: if c.isupper():cp+="_" cp+=c.lower() if cp.endswith("_command"):cp=cp.replace("_command", "") comandos.append(cp) comandosInternos=utils.file_read("D:/sublime3/Data/Packages/User/Default (Windows).sublime-keymap") comandosInternos=re.findall('"command": *"(\w+)" *\}', comandosInternos, re.IGNORECASE) for comandoInterno in comandosInternos:comandos.append(comandoInterno) comandos=sorted(list(set(comandos))) strComandos="" for comando in comandos:strComandos+=comando+"\n" window=sublime.active_window() view=window.active_view() utils.file_write("d:/sublime3/comandos.txt", strComandos) view.run_command("ejecutar_comando", {"comando":"taskkill /f /im CustomizeableJarvis.exe\njarvis\nexit"})
def rclocal(address): """return the rc.local file""" pxe_vars = settings.pxe_variables(cfg, address) file_name = os.path.join(cfg['paths']['templates'], cfg['templates']['rc_local']) result = utils.file_read(file_name) result = utils.apply_template(result, pxe_vars, file_name) return result
def original_count_comment(directory_seq, date, basedir, seconddir='comments'): targetpath = target_path(directory_seq, date, basedir,seconddir) filenames = glob.glob('%s/*.json' % targetpath) cnt_comments = 0 for i, filename in enumerate(filenames): items = file_read(filename) cnt_comments = cnt_comments + len(items['comments']) return cnt_comments
def count_blog_by_directory(directory_seq, date, basedir, seconddir='texts'): #basedir: /var/data/naver-blog/ targetpath = target_path(directory_seq, date, basedir,seconddir) filenames = glob.glob('%s/*.json' % targetpath) cnt_image = 0 for i, filename in enumerate(filenames): items = file_read(filename) cnt_image = cnt_image + len(items['images']) return (len(filenames), cnt_image)
def return_information(directory_seq, basedir, date, crawler_version, seconddir="lists", thirddir="texts", debug=False): if debug: print "Start blog text crawling..." directory_seq = int(directory_seq) try: targetpath = '%s/%s/%02d/%s/%02d/%02d'\ % (basedir, seconddir, directory_seq,\ int(date[0:4]), int(date[5:7]), int(date[8:10])) except TypeError as e: print e raise Exception('Please check input values (ex: the date)') itr1 = 0 filenames = glob.glob('%s/*.json' % targetpath) for filename in filenames: print filename items = file_read(filename) itr2 = 0 for i, blog in enumerate(items): try: check_targetpath = '%s/%s/%02d/%s/%02d/%02d'\ % (basedir, thirddir, directory_seq,\ int(date[0:4]), int(date[5:7]), int(date[8:10])) check_filename = '%s.json' % (items[i]['logNo']) if not os.path.isfile('%s/%s' % (check_targetpath, check_filename)): web_crawl(items[i]['blogId'], items[i]['logNo'], items[i]['crawledTime'], crawler_version, items[i]['title'], items[i]['writtenTime'], items[i]['url'], items[i]['tags'], date, directory_seq, basedir, debug=debug) except Exception as e: print e itr2 += 1 if itr2 == len(items): print "%s items read completed successfully." % len(items) else: print "Not all items read." itr1 += 1 if len(filenames) == itr1: print "%s files read completed successfully." % len(filenames) if len(filenames) == 0: print "You probably have to crawl lists first." else: print "Not all files read."
def original_count_blog_by_directory(directory_seq, date, basedir, seconddir='lists'): targetpath = target_path(directory_seq, date, basedir,seconddir) filenames = glob.glob('%s/*.json' % targetpath) cnt_lists_blog = 0 for i, filename in enumerate(filenames): items = file_read(filename) cnt_lists_blog = cnt_lists_blog + len(items) return cnt_lists_blog
def on_post_save(self, view): if utils.get_language()!="jsf":return window=sublime.active_window() folders=window.folders() if not folders:return folderProyecto=folders[0] if not os.path.exists(os.path.join(folderProyecto, "pom.xml")):return server=utils.get_preference("server") folderDeploy=server_folder_deploy[server] self.folderDeploy=folderDeploy filepath=utils.get_filepath() self.filepath=filepath if server=="weblogic": threading.Thread(target=self.reemplazarTodos).start() return if server!="jboss": folderDeploy=folderDeploy+os.sep+os.listdir(folderDeploy)[0] self.folderDeploy=folderDeploy folderDeploy=os.path.normpath(folderDeploy) print("the folder deploy is : "+folderDeploy) nombreProyecto=filepath.replace(folderProyecto+os.sep, "") #print("el nombre del proyceto es : "+nombreProyecto) nombreProyecto=nombreProyecto[:nombreProyecto.find(os.sep)] #print("el nuevo nombre del proyecto es: "+nombreProyecto) #print("el filepath es : "+filepath) #print("el folderDeploy es : "+folderDeploy) fileLocation=filepath[filepath.find("webapp"+os.sep)+7:] #print("el fileLocation is: "+fileLocation) print(server) print("el nombre del proyecto es : "+nombreProyecto) folders=os.listdir(folderDeploy) folders=[os.path.join(folderDeploy, x) for x in folders] def comparador(x):return os.path.getmtime(x) folders=sorted(folders, key=comparador, reverse=True) print(folders) for folderS in folders: for folder in os.listdir(folderS): print(folder) if folder.find(nombreProyecto)!=-1: fileLocation=folderS+os.sep+folder+os.sep+fileLocation print("la nueva localizacion del archivo es : "+fileLocation) utils.file_write(fileLocation, utils.file_read(filepath)) #print("escrito con exito") return else:print("no")
def plot_group(inputdir, outputdir, recall_pct): filecount = 0 group = os.path.basename(inputdir) filepattern = os.path.join(inputdir, filebase + r'*') res = StructDict() for resultfile in glob.iglob(filepattern): # read in the results files per query size # get the query size from the end part of the file name qsize = os.path.basename(resultfile).split('.')[-1] qsize = int(qsize) lines = file_read(resultfile) # 1st line is min/max # 2nd line is 1st, 2nd, 3rd quartile minmax = [float(x) for x in lines[0].split()] quartiles = [float(x) for x in lines[1].split()] vals = StructDict( {'min': minmax[0], 'max': minmax[1], 'quart1': quartiles[0], 'quart2': quartiles[1], 'quart3': quartiles[2]} ) res[qsize] = vals filecount += 1 # create an array of result values per query size qsizes = list(res.keys()) qsizes.sort() print(qsizes) quartile_1 = [res[qsize]['quart1'] for qsize in qsizes] quartile_2 = [res[qsize]['quart2'] for qsize in qsizes] quartile_3 = [res[qsize]['quart3'] for qsize in qsizes] print(res) print(quartile_1) print(quartile_2) print(quartile_3) print("Dir {} processed {} files".format(group, filecount)) # plot the array plt.plot(qsizes, quartile_1, color="gray") plt.plot(qsizes, quartile_2, color="black") plt.plot(qsizes, quartile_3, color="gray") plt.ylim(ymin=0) plt.grid(which='major', linewidth='0.5', color='darkgray') plt.grid(which='minor', linewidth='0.5', color='lightgray') plt.fill_between(qsizes, quartile_1, quartile_3, color="gray") plt.title('Processes {} genes'.format(group)) plt.xlabel('Query size') plt.ylabel('Fold precision at {}% over random'.format(recall_pct)) # plt.draw() # plt.pause(1) filename = "{}r{}.pdf".format(group, recall_pct) outputfile = os.path.join(outputdir, filename) plt.savefig(outputfile) resfile = os.path.join(outputdir, filename.replace('pdf', 'csv')) save_vals(res, resfile)
def plot_curve(inputdir, outputdir): filecount = 0 res = StructDict() filepattern = os.path.join(inputdir, filebase + r'*') for resultfile in glob.iglob(filepattern): # read in the results files per query size # get the query size from the end part of the file name recall_pct = os.path.basename(resultfile).split('.')[-1] recall_pct = int(recall_pct) lines = file_read(resultfile) # 1st line is min/max # 2nd line is 1st, 2nd, 3rd quartile minmax = [float(x) for x in lines[0].split()] quartiles = [float(x) for x in lines[1].split()] vals = StructDict({ 'min': minmax[0], 'max': minmax[1], 'quart1': quartiles[0], 'quart2': quartiles[1], 'quart3': quartiles[2] }) res[recall_pct] = vals filecount += 1 # create an array of result values per query size recall_depths = list(res.keys()) recall_depths.sort() print(recall_depths) quartile_1 = [res[depth]['quart1'] for depth in recall_depths] quartile_2 = [res[depth]['quart2'] for depth in recall_depths] quartile_3 = [res[depth]['quart3'] for depth in recall_depths] print(res) print(quartile_1) print(quartile_2) print(quartile_3) print("Processed {} files".format(filecount)) # plot the array plt.plot(recall_depths, quartile_1, color="gray") plt.plot(recall_depths, quartile_2, color="black") plt.plot(recall_depths, quartile_3, color="gray") plt.ylim(ymin=0) plt.xscale('log') plt.grid(which='major', linewidth='0.5', color='darkgray') plt.grid(which='minor', linewidth='0.5', color='lightgray') plt.fill_between(recall_depths, quartile_1, quartile_3, color="gray") plt.title('Seek Search Performance') plt.xlabel('Recall Percent') plt.ylabel('Fold precision over random') # plt.draw() # plt.pause(1) filename = "precision_vs_depth.pdf" outputfile = os.path.join(outputdir, filename) plt.savefig(outputfile) resfile = os.path.join(outputdir, filename.replace('pdf', 'csv')) save_vals(res, resfile)
def _merge_seeds(self, seeds, values): """merge the main seed file with the recipe(s) and additional seeds return it as a string""" result = '' for seed in seeds: file_name = os.path.join(self.cfg['paths']['seeds'], seed + '.seed') logging.info('applying template values to "%s"', file_name) data = utils.file_read(file_name) result += utils.apply_template(data, values, file_name) return result
def run(self, edit): archivos=utils.get_files({"folder":sublime.packages_path(), "ext":"py"}) comandos=[] for archivo in archivos: texto=utils.file_read(archivo) comandos+=re.findall("class\s+([\w]+)\(sublime_plugin.TextCommand\):", texto, flags=re.IGNORECASE) comandos=list(set(comandos)) # print(comandos) for comando in comandos: self.generar_comando(comando)
def makePlot1aFilelists(golddir, resultdir, outputdir, genefile): golddir = utils.makeAbsolutePath(golddir) resultdir = utils.makeAbsolutePath(resultdir) outputdir = utils.makeAbsolutePath(outputdir) genefile = utils.makeAbsolutePath(genefile) # Note: query and gold standard files will be from the known good results dir # in this case the 'golddir' # gscore files will be from the 'resultdir' filecount = 0 filepattern = os.path.join(golddir, r'[0-9]*.query') for queryfile in glob.iglob(filepattern): filecount += 1 # print(queryfile) # read file to find query size lines = utils.file_read(queryfile) assert len(lines) == 1 num_genes = len(lines[0].split()) outbasename = os.path.join(outputdir, 'filelist_q{}'.format(num_genes)) # append the files to the appropriate file lists # query file list queryfilelist = outbasename + '.query' utils.file_appendline(queryfilelist, queryfile) # gold standard file list # - gold standard file is the genes that should be correlated with the query genes goldfilelist = outbasename + '.gold' utils.file_appendline(goldfilelist, re.sub('query$', 'gold', queryfile)) # query results 'gscore' file list # - gscore are the resulting gene correlation scores from the SeekMiner query result gscorefilelist = outbasename + '.gscore' qbasename = os.path.basename(queryfile) gscorefile = os.path.join(resultdir, qbasename.replace('query', 'gscore')) utils.file_appendline(gscorefilelist, gscorefile) # include file list - genes to include in the results includefile = outbasename + '.include' utils.file_appendline(includefile, genefile) # excluse file list # exclude all genes in the query file excludefile = outbasename + '.exclude' utils.file_appendline(excludefile, queryfile) if filecount == 0: print("No matching files found") exit(-1) print("Num files processed: {}".format(filecount))
def crearArchivo(self): archivo=self.opcion.split(":") print(archivo) self.lang=archivo[0].strip() lang=self.lang archivo=lang+"/"+archivo[1].strip()+"."+self.extension archivo=TEMPLATES_PATH+archivo print("la ubicacion del archivo es : "+archivo) self.text=utils.file_read(archivo) print(self.text) window=sublime.active_window() window.show_input_panel("File Name", "", self.pedirNombre, None, None)
def generate(self): """generate the pxe boot file""" self.pxe_variables.update({ 'config': self.config, 'seeds': self.seeds, 'seed_host': cfg['settings']['seed_host'], 'seed_port': cfg['settings']['bottle_port'], 'address': self.address, 'overlay': self.overlay, 'puppet_manifests': self.puppet, 'host_name': self.host_name, 'dns_domain': self.dns_domain, 'fqdn': self.fqdn, 'query': urllib.urlencode([('address', self.address)]), 'date_generated': utils.date_time(), 'date_disabled': '', 'kernel': '%s/%s/%s' % ('seedbank', self.release, 'linux'), 'initrd': '%s/%s/%s' % ('seedbank', self.release, 'initrd.gz') }) if self.config: yaml_file = os.path.join(cfg['paths']['configs'], self.config) yaml_file = yaml_file + '.yaml' overrides = utils.yaml_read(yaml_file) if 'pxe' in overrides: cfg['pxe'].update(overrides['pxe']) values = cfg['pxe'] self.pxe_variables.update(values) distribution = self.release.split('-')[0] file_name = cfg[distribution]['template_pxe'] file_name = os.path.join(cfg['paths']['templates'], file_name) if not os.path.isfile(file_name): err = 'file "%s" does not exist (hint: check the templates '\ 'section in your settings)' % file_name raise utils.FatalException(err) pxe_variables_custom = [] for variable in self.variables: key, value = variable pxe_variables_custom.append('# %s = %s' % (key, value)) self.pxe_variables[key] = value pxe_variables_custom = '\n'.join(pxe_variables_custom) data = utils.file_read(file_name) data = utils.apply_template(data, self.pxe_variables, file_name) if pxe_variables_custom: data = re.sub( '(#\n# \*\*\* end - seedBank pxe variables \*\*\*)', pxe_variables_custom + '\n\\1', data) return data
def return_information(directory_seq, basedir, date, crawler_version, seconddir ="lists", thirddir="texts", debug=False): if debug: print "Start blog text crawling..." directory_seq = int(directory_seq) try: targetpath = '%s/%s/%02d/%s/%02d/%02d'\ % (basedir, seconddir, directory_seq,\ int(date[0:4]), int(date[5:7]), int(date[8:10])) except TypeError as e: print e raise Exception('Please check input values (ex: the date)') itr1 = 0 filenames = glob.glob('%s/*.json' % targetpath) for filename in filenames: print filename items = file_read(filename) itr2 = 0 for i, blog in enumerate(items): try: check_targetpath = '%s/%s/%02d/%s/%02d/%02d'\ % (basedir, thirddir, directory_seq,\ int(date[0:4]), int(date[5:7]), int(date[8:10])) check_filename = '%s.json' % (items[i]['logNo']) if not os.path.isfile('%s/%s' % (check_targetpath, check_filename)): web_crawl(items[i]['blogId'], items[i]['logNo'], items[i]['crawledTime'], crawler_version, items[i]['title'], items[i]['writtenTime'], items[i]['url'], items[i]['tags'], date, directory_seq, basedir, debug=debug) except Exception as e: print e itr2 += 1 if itr2 == len(items): print "%s items read completed successfully." % len(items) else: print "Not all items read." itr1 += 1 if len(filenames) == itr1: print "%s files read completed successfully." % len(filenames) if len(filenames)==0: print "You probably have to crawl lists first." else: print "Not all files read."
def run(self, edit, **args): if not args.get("nombre"):return nombre=args.get("nombre") for c in os.listdir(TEMPLATES_PATH): # print(c) if nombre.lower()==c.lower()[:c.rfind(".")]: texto=utils.file_read(TEMPLATES_PATH+"/"+c) self.texto=texto if not utils.get_text().strip(): self.insertar() else: # print("no tiene texto") self.texto=texto window=sublime.active_window() window.show_input_panel("", c[c.rfind("."):], self.crear_archivo, None, None)
def run(self, edit): d={} modulos=utils.file_read(GO_API_FILE) lineas=modulos.splitlines() for linea in lineas: if linea: ocurrencias=re.findall(REGEX_FUNCION, linea, re.IGNORECASE) if ocurrencias: paquete=ocurrencias[0][0] if paquete.find("/")!=-1:paquete=paquete[paquete.find("/")+1:] funcion=ocurrencias[0][1] if not d.get(paquete):d[paquete]=[] d[paquete].append(funcion) utils.save_json(GO_MAIN_MODULE, d) for key in d.keys(): utils.save_json(GO_MODULES+key+".json", d[key])
def original_count_blog_by_time(directory_seq, date, basedir, seconddir='lists'): time_cnt = {} for d in range(0, 2): tmp_date = datetime.strptime(date, '%Y-%m-%d') + timedelta(days=d) tmp_date = tmp_date.isoformat() targetpath = target_path(directory_seq, tmp_date, basedir,seconddir) filenames = glob.glob('%s/*.json' % targetpath) for i, filename in enumerate(filenames): items = file_read(filename) for j, item in enumerate(items): written_time = item['writtenTime'] if int(written_time[8:10]) == int(date[8:10]): key = int(written_time[11:13]) if key in time_cnt.keys(): time_cnt[key] += 1 else: time_cnt[key] = 0 return [time_cnt]
def return_information(directory_seq, basedir, date, crawler_version,\ seconddir ="lists", thirddir="comments", debug=False): directory_seq = int(directory_seq) try: targetpath = '%s/%s/%02d/%s/%02d/%02d'\ % (basedir, seconddir, directory_seq,\ int(date[0:4]), int(date[5:7]), int(date[8:10])) except TypeError: raise Exception('Please check input values (ex: the date)') itr1 = 0 filenames = glob.glob('%s/*.json' % targetpath) for filename in filenames: print filename items = file_read(filename) itr2 = 0 for i, blog in enumerate(items): check_targetpath = '%s/%s/%02d/%s/%02d/%02d'\ % (basedir, thirddir, directory_seq,\ int(date[0:4]), int(date[5:7]), int(date[8:10])) check_filename = '%s.json' % (items[i]['logNo']) if not os.path.isfile('%s/%s' % (check_targetpath, check_filename)): comment_crawl(items[i]['blogId'], items[i]['logNo'], items[i]['writtenTime'], date, directory_seq, basedir, crawler_version, debug=debug) time.sleep(0.1) itr2 += 1 if itr2 == len(items): print "%s items read completed successfully." % len(items) else: print "Not all items read." itr1 += 1 if len(filenames) == itr1: print "%s files read completed successfully." % len(filenames) else: print "Not all files read."
type=str, required=True, help='input file with list of correlated genes') argParser.add_argument('--group-list', '-l', type=str, required=True, help='list of GO groups to pull out') argParser.add_argument('--output-file', '-o', type=str, required=True, help='output file of matching groups') args = argParser.parse_args() glist = file_read(args.group_list) gmt = parse_gmt(args.gmt_file, 0, sys.maxsize) gmt_dict = {} for group in gmt: desc = group['desc'] desc = desc.lower() desc = re.sub('__', '_', desc) gmt_dict[desc] = group with open(args.output_file, 'w') as fp: count = 0 for desc in glist: desc = desc.rstrip() desc = desc.lower()
def main(argv): if len(argv) == 0: print ("""\nUsage: python3 -m adapter config-file.ini ( stop | daemon ) """ + adapter_app +""", version """ + adapter_version +""" config-file Configuration file, see mcfeedadapter_example.conf for examples. action Optional, one of the following: stop Stop running adapter daemon Start adapter as daemon """) return 0 print (adapter_app + """, version """ + adapter_version + "\n") print ("") args=readconf.parse_argv(argv) if cfg.action is not None: if cfg.action == 'daemon': utils.become_daemon(); if not readconf.read_conf(args): return 1; if not initialize_outputs(): close_outputs() return 1; current_pid=utils.file_read(cfg.pid_file) if cfg.action is not None: if (cfg.action == 'stop') or (cfg.action == 'status'): if current_pid is None: print("Adapter is not running\n") return 0 process_id=int(current_pid) print("Adapter found, PID " + str(process_id)) if cfg.action == 'stop': utils.remove_file(cfg.pid_file) while utils.is_process_running(process_id): time.sleep(0.05) print("Adapter stopped\n") return 0 if current_pid is not None: utils.print_error("Adapter for this feed is already running") return 1 utils.file_write(cfg.pid_file,utils.get_pid()) signal.signal(signal.SIGINT,term_signal_handler) signal.signal(signal.SIGTERM,term_signal_handler) current_pid=utils.file_read(cfg.pid_file) utils.log_write("Adapter started, PID: " + str(current_pid)) while current_pid is not None: iteration_result=adapter_iteration() if cfg.action is not None: if cfg.action == 'term': utils.remove_file(cfg.pid_file) current_pid = None if current_pid is not None: if not iteration_result[0]: utils.print_error("Adapter encountered error when processing feed records and will be stopped") utils.remove_file(cfg.pid_file) return 1 if not iteration_result[1]: time.sleep(0.1) current_pid=utils.file_read(cfg.pid_file) close_outputs() utils.log_write("Adapter stopped")
def generate(self): """generate the pxe boot file""" self.pxe_variables.update({ 'config': self.config, 'seeds': self.seeds, 'seed_host': cfg['settings']['seed_host'], 'seed_port': cfg['settings']['bottle_port'], 'address': self.address, 'overlay': self.overlay, 'puppet_manifests': self.puppet, 'host_name': self.host_name, 'dns_domain': self.dns_domain, 'fqdn': self.fqdn, 'query': urllib.urlencode([('address', self.address)]), 'date_generated': utils.date_time(), 'date_disabled': '', 'kernel': '%s/%s/%s' % ('seedbank', self.release, 'linux'), 'initrd': '%s/%s/%s' % ('seedbank', self.release, 'initrd.gz') }) if self.config: yaml_file = os.path.join(cfg['paths']['configs'], self.config) yaml_file = yaml_file + '.yaml' overrides = utils.yaml_read(yaml_file) if 'pxe' in overrides: cfg['pxe'].update(overrides['pxe']) values = cfg['pxe'] self.pxe_variables.update(values) distribution = self.release.split('-')[0] file_name = cfg[distribution]['template_pxe'] file_name = os.path.join(cfg['paths']['templates'], file_name) if not os.path.isfile(file_name): err = 'file "%s" does not exist (hint: check the templates '\ 'section in your settings)' % file_name raise utils.FatalException(err) pxe_variables_custom = [] for variable in self.variables: key, value = variable pxe_variables_custom.append('# %s = %s' % (key, value)) self.pxe_variables[key] = value pxe_variables_custom = '\n'.join(pxe_variables_custom) data = utils.file_read(file_name) data = utils.apply_template(data, self.pxe_variables, file_name) if pxe_variables_custom: data = re.sub('(#\n# \*\*\* end - seedBank pxe variables \*\*\*)', pxe_variables_custom + '\n\\1', data) return data
# -*-coding:utf-8-*- import json import os import glob import csv from datetime import datetime, timedelta from utils import checkdir, file_read, get_today, get_version if __name__ == "__main__": num_directory_seq = 31 basedir = "/home/web/public_html/data/naver-blog" seconddir = "statistics" targetpath = "%s/%s" % (basedir, seconddir) filenames = glob.glob("%s/*.json" % targetpath) cnt_files = len(filenames) table = [[0 for i in range(cnt_files)] for j in range(num_directory_seq + 2)] for i, filename in enumerate(filenames): items = file_read(filename) print filename table[0][i] = filename.rsplit("statistics/", 1)[1].rsplit(".", 1)[0] for directory_seq in range(len(items)): table[directory_seq + 1][i] = items[directory_seq]["countTextsBlog"] table[num_directory_seq + 1][i] = table[num_directory_seq + 1][i] + table[directory_seq + 1][i] savefilename = "/home/web/public_html/data/naver-blog/test_file.csv" csvfile = open(savefilename, "w") wr = csv.writer(csvfile, dialect="excel") [wr.writerow(r) for r in table]
def main() -> None: prefix = "cache/" now = datetime.datetime.today() time_ym = now.strftime("%Y-%m") time_dmy = now.strftime("%d. %b %Y") semester = utils.json_read(prefix + "current_semester.json", None) semester = semester[0] +" "+ semester[1] folder = "gh-pages/" pflicht: List[Tuple[str, str]] = [] fields: Dict[str, Dict[str, Tuple[str, str]]] = {} pflicht = utils.json_read(prefix + "pre-tucan-pflicht.json", pflicht) fields = utils.json_read(prefix + "pre-inferno.json", fields) #nebenfach = utils.json_read("nebenfach.json") # back = utils.groupby(((course, major +" · "+ category) # for major,v in nebenfach.items() # for category,v in v.items() # for module in v # for course in module), key=lambda x:x[0]) # back = {k:["Y Nebenfach · " + " &<br> ".join(i[1] for i in v),""] for k,v in back} # fields = [back] + list(fields.values()) # print(json.dumps(fields, indent=2)) # dist/main.js with npm; code.orig.js without npm if os.path.exists("dist/main.js"): CODE_FILE = "dist/main.js" else: CODE_FILE = "code.orig.js" page_tmpl = utils.file_read("page.html") index_tmpl = utils.file_read("index.html") code_tmpl = utils.file_read(CODE_FILE) style_tmpl = utils.file_read("style.css") def filename(reg: str) -> str: return "".join(c for c in reg if c.isalnum()) regulations = [ (k, k.replace("B.Sc.", "Bachelor") .replace("M.Sc.", "Master") .replace(" (2015)", ""), filename(k) + ".html") for k in fields.keys() if k.endswith(" (2015)") ] + [ # other FBs? ("BauUmwelt", "FB 13 Bau, Umwelt", "BauUmwelt.html") ] listy = [ {'href': href, 'title': semester +" "+ display_regulation} for regulation, display_regulation, href in regulations if display_regulation.endswith(" Informatik") if not display_regulation.startswith("FB ") ] experimentallist = [ {'href': href, 'title': semester +" "+ display_regulation} for regulation, display_regulation, href in regulations if not display_regulation.endswith(" Informatik") if not display_regulation.startswith("FB ") ] speciallist = [ {'href': href, 'title': semester +" "+ display_regulation} for regulation, display_regulation, href in regulations if display_regulation.startswith("FB ") ] index_data = { "list": listy, "experimentallist": experimentallist, "speciallist": speciallist, } utils.file_write(folder + "/index.html", stache(index_tmpl, index_data)) utils.file_write(folder + "/main.js", code_tmpl) utils.file_write(folder + "/style.css", style_tmpl) print(regulations) for regulation, display_regulation, href in regulations: print(prefix + "-" + filename(regulation) + ".json") modules: Dict[str, Module] = {} modules = utils.json_read(prefix + "-" + filename(regulation) + ".json", modules) if modules == []: continue # if file exists data = [clean(module_id, module, fields, regulation) for module_id, module in modules.items()] data.sort(key=lambda x: (x['category'], x['id'])) # -int(x['credits']) js_data = json.dumps(data, indent=1) page_data = { "today": time_dmy, "semester": semester, "regulation": display_regulation, "js_data": js_data, "content": generate_page(data) } utils.file_write(folder + "/" + href, stache(page_tmpl, page_data)) print("finished")
import csv from datetime import datetime, timedelta from utils import checkdir, file_read, get_today, get_version if __name__ == '__main__': num_directory_seq = 31 basedir = '/home/web/public_html/data/naver-blog' seconddir = 'statistics' targetpath = '%s/%s' % (basedir, seconddir) filenames = glob.glob('%s/*.json' % targetpath) cnt_files = len(filenames) table = [[0 for i in range(cnt_files)] for j in range(num_directory_seq + 2)] for i, filename in enumerate(filenames): items = file_read(filename) print filename table[0][i] = filename.rsplit("statistics/", 1)[1].rsplit(".", 1)[0] for directory_seq in range(len(items)): table[directory_seq + 1][i] = items[directory_seq]['countTextsBlog'] table[num_directory_seq + 1][i] = table[num_directory_seq + 1][i] + table[directory_seq + 1][i] savefilename = '/home/web/public_html/data/naver-blog/test_file.csv' csvfile = open(savefilename, 'w') wr = csv.writer(csvfile, dialect='excel') [wr.writerow(r) for r in table]