def files_rank_correlation(fileA, fileB):
    # read in the file data lines
    A_lines = utils.file_read(fileA)
    B_lines = utils.file_read(fileB)

    assert len(A_lines) == len(B_lines)

    correlations = []
    # for each line calculate the correlation between the two files
    for i in range(len(A_lines)):
        # assign an order rank to the values in the line
        A_dict_rank = {val: j for j, val in enumerate(A_lines[i].split())}
        B_dict_rank = {val: j for j, val in enumerate(B_lines[i].split())}
        A_rank = []
        B_rank = []
        # for each value in line A get the rank order in list A and B
        keys = list(A_dict_rank.keys())
        max_rank = len(keys)
        for key in keys:
            A_rank.append(A_dict_rank[key])
            B_rank.append(B_dict_rank.get(key, max_rank))
        # calculate the spearman's correlation coefficient between the two rankings
        corr, _ = stats.spearmanr(A_rank, B_rank)
        # print(A_rank)
        # print(B_rank)
        correlations.append(corr)

    return correlations
Exemple #2
0
    def generate(self, path):
        """generate an overlay permissions file"""
        perm_file = path + '.permissions'

        overlay_contents = []
        for root, dirs, files in os.walk(path):
            for sub_dir in dirs:
                overlay_contents.append(os.path.join(root, sub_dir))
            for file_name in files:
                if not file_name == '.gitignore':
                    overlay_contents.append(os.path.join(root, file_name))

        perm_list = {}
        for entry in overlay_contents:
            stat = os.stat(entry)
            mode = int(oct(stat.st_mode)[3:])
            entry = entry.split(path, 1)[1]
            if entry == '/root':
                perm_list[entry] = ('0700', 0, 0)
            elif entry.endswith('.ssh'):
                perm_list[entry] = ('0700', 0, 0)
            elif entry.endswith('authorized_keys'):
                perm_list[entry] = ('0700', 0, 0)
            elif entry.startswith('/usr/local/bin'):
                perm_list[entry] = ('0755', 0, 0)
            elif entry == '/etc/rc.local':
                perm_list[entry] = ('0755', 0, 0)
            else:
                perm_list[entry] = ('%04d' % mode, 0, 0)

        if os.path.isfile(perm_file):
            data = utils.file_read(perm_file).split('\n')
            defined_list = {}
            for line in data:
                try:
                    mode, uid, gid, real_path = line.split('\t')
                except ValueError:
                    pass
                else:
                    defined_list[real_path] = (mode, uid, gid)
            for real_path in perm_list:
                if real_path in defined_list:
                    perm_list[real_path] = defined_list[real_path]
       
        data = [] 
        header_file = os.path.join(self.cfg['paths']['templates'],
            self.cfg['templates']['permission_script'])
        header = utils.file_read(header_file)
        data.append(header.strip())
        for key, value in perm_list.items():
            data.append('%s\t%s\t%s\t%s' % (value[0], value[1], value[2], key))
        utils.file_write(perm_file, '\n'.join(data) + '\n')
Exemple #3
0
    def generate(self, path):
        """generate an overlay permissions file"""
        perm_file = path + '.permissions'

        overlay_contents = []
        for root, dirs, files in os.walk(path):
            for sub_dir in dirs:
                overlay_contents.append(os.path.join(root, sub_dir))
            for file_name in files:
                if not file_name == '.gitignore':
                    overlay_contents.append(os.path.join(root, file_name))

        perm_list = {}
        for entry in overlay_contents:
            stat = os.stat(entry)
            mode = int(oct(stat.st_mode)[3:])
            entry = entry.split(path, 1)[1]
            if entry == '/root':
                perm_list[entry] = ('0700', 0, 0)
            elif entry.endswith('.ssh'):
                perm_list[entry] = ('0700', 0, 0)
            elif entry.endswith('authorized_keys'):
                perm_list[entry] = ('0700', 0, 0)
            elif entry.startswith('/usr/local/bin'):
                perm_list[entry] = ('0755', 0, 0)
            elif entry == '/etc/rc.local':
                perm_list[entry] = ('0755', 0, 0)
            else:
                perm_list[entry] = ('%04d' % mode, 0, 0)

        if os.path.isfile(perm_file):
            data = utils.file_read(perm_file).split('\n')
            defined_list = {}
            for line in data:
                try:
                    mode, uid, gid, real_path = line.split('\t')
                except ValueError:
                    pass
                else:
                    defined_list[real_path] = (mode, uid, gid)
            for real_path in perm_list:
                if real_path in defined_list:
                    perm_list[real_path] = defined_list[real_path]

        data = []
        header_file = os.path.join(self.cfg['paths']['templates'],
                                   self.cfg['templates']['permission_script'])
        header = utils.file_read(header_file)
        data.append(header.strip())
        for key, value in perm_list.items():
            data.append('%s\t%s\t%s\t%s' % (value[0], value[1], value[2], key))
        utils.file_write(perm_file, '\n'.join(data) + '\n')
Exemple #4
0
	def recorrerCarpeta(self, archivo):
#		print(archivo)
		if os.path.isdir(archivo):
			for ruta in os.listdir(archivo):
				self.recorrerCarpeta(archivo+"/"+ruta)
		elif os.path.basename(archivo)==self.nombreArchivo and os.path.basename(os.path.dirname(archivo))==self.nombreCarpeta:
			utils.file_write(archivo, utils.file_read(self.filepath))
Exemple #5
0
def pxe_variables(cfg, address):
    """try to read the pxe variables from the pxelinux.cfg file"""
    file_name = os.path.join(cfg['paths']['tftpboot'], 'pxelinux.cfg', address)
    if not os.path.isfile(file_name):
        raise Exception('"%s" does not exist' % file_name)

    data = utils.file_read(file_name)
    data = re.search(
        r'# \*\*\* start - seedBank pxe variables \*\*\*$(.*?)\*\*\*', data,
        re.M | re.S)
    data = data.group(1).strip()
    data = re.sub('(?m)^#', '', data)
    data = [line.strip() for line in data.split('\n') if line]
    data = [line.split(' =', 1) for line in data]
    data = [(line[0].strip(), line[1].strip()) for line in data]

    lines = []
    values = ['None', 'False', 'True']
    for line in data:
        if line[1].startswith('[') or line[1] in values:
            lines.append((line[0], ast.literal_eval(line[1])))
        else:
            lines.append(line)
    result = dict(lines)
    return result
Exemple #6
0
def puppet(manifest):
    """return the generated seedbank bootstrap init file"""
    file_name = os.path.join(cfg['paths']['templates'],
                             cfg['templates']['puppet_manifest'])
    result = utils.file_read(file_name)
    result = utils.apply_template(result, {'manifest': manifest}, file_name)
    return result
Exemple #7
0
def pxe_variables(cfg, address):
    """try to read the pxe variables from the pxelinux.cfg file"""
    file_name = os.path.join(cfg['paths']['tftpboot'], 'pxelinux.cfg', address)
    if not os.path.isfile(file_name):
        raise Exception('"%s" does not exist' % file_name)

    data = utils.file_read(file_name)
    data = re.search(
        r'# \*\*\* start - seedBank pxe variables \*\*\*$(.*?)\*\*\*',
        data, re.M|re.S)
    data = data.group(1).strip()
    data = re.sub('(?m)^#', '', data)
    data = [line.strip() for line in data.split('\n') if line]
    data = [line.split(' =', 1) for line in data]
    data = [(line[0].strip(), line[1].strip()) for line in data]

    lines = []
    values = ['None', 'False', 'True']
    for line in data:
        if line[1].startswith('[') or line[1] in values:
            lines.append((line[0], ast.literal_eval(line[1])))
        else:
            lines.append(line)
    result = dict(lines)
    return result
Exemple #8
0
def fuzz_check(input_file, n_tests, to_randomize):
    """
  Check user inputs and process them as required by fuzz
  """
    input_seed = utils.file_read(input_file)
    l = len(input_seed)
    print('\nInput size is', l, 'bytes')

    try:
        n_tests = int(n_tests)
        if n_tests < 1:
            usage('N_TESTS should be > 0')
    except ValueError:
        usage('"' + str(n_tests) + '" is not a valid integer')
    print('Running test', n_tests, 'times')

    try:
        n_bytes = round(l * float(to_randomize))
        if n_bytes < 0 or n_bytes > l:
            usage('PART_TO_RANDOMIZE should be between 0 and 1')
    except ValueError:
        usage('"' + to_randomize + '" is not a valid float')
    print('Randomizing', n_bytes, 'bytes\n')

    return input_seed, n_tests, n_bytes
Exemple #9
0
 def script(self, dst, overlay, prefix):
     """generate the permissions script which will be applied before the end
     of an installation"""
     path = os.path.join(self.cfg['paths']['overlays'], overlay)
     perm_file = path + '.permissions'
     perm_script = os.path.join(dst, 'fix_perms.sh')
     if os.path.isfile(perm_file):
         data = utils.file_read(perm_file).strip()
         lines = data.split('\n')
         lines = [line for line in lines if not line.startswith('#')]
         script = []
         for line in lines:
             try:
                 mode, uid, gid, real_path = line.split('\t')
             except ValueError:
                 err = '%s is corrupt, delete or regenerate it with '\
                     'the "seedbank manage --overlay" command, or fix the '\
                     'file manually, line "%s" contains errors' % \
                     (perm_file, line)
                 raise utils.FatalException(err)
             else:
                 if prefix:
                     real_path = os.path.join(prefix, real_path[1:])
                     if real_path.endswith('.sb_template'):
                         real_path = os.path.splitext(real_path)[0]
                 script.append('chown %s:%s %s' % (uid, gid, real_path))
                 script.append('chmod %s %s' % (mode, real_path))
         utils.file_write(perm_script, '\n'.join(script))
     else:
         logging.warning('overlay "%s" has been selected but permission '
             'file "%s" does not exist, so all files will be owned by root '
             'and will keep the current permissons which could lead to '
             'problems', overlay, perm_file)
         utils.file_write(perm_script, '')
Exemple #10
0
def puppet(manifest):
    """return the generated seedbank bootstrap init file"""
    file_name = os.path.join(cfg['paths']['templates'],
        cfg['templates']['puppet_manifest'])
    result = utils.file_read(file_name)
    result = utils.apply_template(result, {'manifest': manifest}, file_name)
    return result
Exemple #11
0
    def aplicarSonar(ruta):
        text=utils.file_read(ruta)
        text=quitCurlyAlones(text)
        utils.file_write(ruta, text)

                
                
Exemple #12
0
 def script(self, dst, overlay, prefix):
     """generate the permissions script which will be applied before the end
     of an installation"""
     path = os.path.join(self.cfg['paths']['overlays'], overlay)
     perm_file = path + '.permissions'
     perm_script = os.path.join(dst, 'fix_perms.sh')
     if os.path.isfile(perm_file):
         data = utils.file_read(perm_file).strip()
         lines = data.split('\n')
         lines = [line for line in lines if not line.startswith('#')]
         script = []
         for line in lines:
             try:
                 mode, uid, gid, real_path = line.split('\t')
             except ValueError:
                 err = '%s is corrupt, delete or regenerate it with '\
                     'the "seedbank manage --overlay" command, or fix the '\
                     'file manually, line "%s" contains errors' % \
                     (perm_file, line)
                 raise utils.FatalException(err)
             else:
                 if prefix:
                     real_path = os.path.join(prefix, real_path[1:])
                     if real_path.endswith('.sb_template'):
                         real_path = os.path.splitext(real_path)[0]
                 script.append('chown %s:%s %s' % (uid, gid, real_path))
                 script.append('chmod %s %s' % (mode, real_path))
         utils.file_write(perm_script, '\n'.join(script))
     else:
         logging.warning(
             'overlay "%s" has been selected but permission '
             'file "%s" does not exist, so all files will be owned by root '
             'and will keep the current permissons which could lead to '
             'problems', overlay, perm_file)
         utils.file_write(perm_script, '')
    def run(self, edit):
        paquete_snippets=sublime.packages_path()+os.sep+"snippets"
        lista=[]
        comandos=[]
        for archivo in utils.get_files({"folder":paquete_snippets, "ext":"json"}):
            snip=utils.load_json(archivo)
            lista=lista + list(snip.keys())
        lista=list(set(lista))
        for snippet in lista:
            snippet=snippet.lower().replace("-", "_").replace("(", "").replace(")", "").replace(" ", "").replace("?", "").replace(":", "")
            utils.file_write(RUTA_COMANDOS+"code_"+snippet+".bat", "echo code_"+snippet+" > d:/sublime3/comando.txt")
            comandos.append("code_"+snippet)
        archivos_plantillas=utils.get_files({"folder":RUTA_PLANTILLAS})
        for plantilla in archivos_plantillas:
            plantilla=os.path.basename(plantilla)
            if plantilla.rfind(".")!=-1:plantilla=plantilla[:plantilla.rfind(".")]
            plantilla=plantilla.replace(" ", "_").lower()
            utils.file_write(RUTA_COMANDOS+"make_"+plantilla+".bat", "echo make_"+plantilla+" > d:/sublime3/comando.txt")
            comandos.append("make_"+plantilla)
        archivos_python=utils.get_files({"folder":sublime.packages_path(), "ext":".py"})
        for programa in archivos_python:
            rutaPrograma=programa
            try:programa=utils.file_read(programa)
            except:
                print("saco error al leer : "+rutaPrograma)
                continue
            comandosPython=re.findall("class ([\w]+)\(sublime_plugin.TextCommand\)",programa, re.IGNORECASE)
            for comandoPython in comandosPython:
                comandoPython=comandoPython[0].lower()+comandoPython[1:]
                cp=""
                for c in comandoPython:
                    if c.isupper():cp+="_"
                    cp+=c.lower()
                if cp.endswith("_command"):cp=cp.replace("_command", "")
                comandos.append(cp)
        comandosInternos=utils.file_read("D:/sublime3/Data/Packages/User/Default (Windows).sublime-keymap")
        comandosInternos=re.findall('"command": *"(\w+)" *\}', comandosInternos, re.IGNORECASE)
        for comandoInterno in comandosInternos:comandos.append(comandoInterno)
        comandos=sorted(list(set(comandos)))
        strComandos=""
        for comando in comandos:strComandos+=comando+"\n"

        window=sublime.active_window()
        view=window.active_view()
        utils.file_write("d:/sublime3/comandos.txt", strComandos)
        view.run_command("ejecutar_comando", {"comando":"taskkill /f /im CustomizeableJarvis.exe\njarvis\nexit"})
        
Exemple #14
0
def rclocal(address):
    """return the rc.local file"""
    pxe_vars = settings.pxe_variables(cfg, address)
    file_name = os.path.join(cfg['paths']['templates'],
                             cfg['templates']['rc_local'])
    result = utils.file_read(file_name)
    result = utils.apply_template(result, pxe_vars, file_name)
    return result
Exemple #15
0
def rclocal(address):
    """return the rc.local file"""
    pxe_vars = settings.pxe_variables(cfg, address)
    file_name = os.path.join(cfg['paths']['templates'],
        cfg['templates']['rc_local'])
    result = utils.file_read(file_name)
    result = utils.apply_template(result, pxe_vars, file_name)
    return result
def original_count_comment(directory_seq, date, basedir, seconddir='comments'):

	targetpath = target_path(directory_seq, date, basedir,seconddir)
	filenames = glob.glob('%s/*.json' % targetpath)
	cnt_comments = 0
	for i, filename in enumerate(filenames):
		items = file_read(filename)
		cnt_comments = cnt_comments	+ len(items['comments'])
	return cnt_comments
def count_blog_by_directory(directory_seq, date, basedir, seconddir='texts'):
	#basedir: /var/data/naver-blog/
	targetpath = target_path(directory_seq, date, basedir,seconddir)
	filenames = glob.glob('%s/*.json' % targetpath)
	cnt_image = 0
	for i, filename in enumerate(filenames):
		items = file_read(filename)
		cnt_image = cnt_image + len(items['images'])
	return (len(filenames), cnt_image)
Exemple #18
0
def return_information(directory_seq,
                       basedir,
                       date,
                       crawler_version,
                       seconddir="lists",
                       thirddir="texts",
                       debug=False):
    if debug:
        print "Start blog text crawling..."
    directory_seq = int(directory_seq)
    try:
        targetpath = '%s/%s/%02d/%s/%02d/%02d'\
                         % (basedir, seconddir, directory_seq,\
                            int(date[0:4]), int(date[5:7]), int(date[8:10]))
    except TypeError as e:
        print e
        raise Exception('Please check input values (ex: the date)')
    itr1 = 0
    filenames = glob.glob('%s/*.json' % targetpath)
    for filename in filenames:
        print filename
        items = file_read(filename)
        itr2 = 0
        for i, blog in enumerate(items):
            try:
                check_targetpath = '%s/%s/%02d/%s/%02d/%02d'\
                                % (basedir, thirddir, directory_seq,\
                                   int(date[0:4]), int(date[5:7]), int(date[8:10]))
                check_filename = '%s.json' % (items[i]['logNo'])
                if not os.path.isfile('%s/%s' %
                                      (check_targetpath, check_filename)):
                    web_crawl(items[i]['blogId'],
                              items[i]['logNo'],
                              items[i]['crawledTime'],
                              crawler_version,
                              items[i]['title'],
                              items[i]['writtenTime'],
                              items[i]['url'],
                              items[i]['tags'],
                              date,
                              directory_seq,
                              basedir,
                              debug=debug)
            except Exception as e:
                print e
            itr2 += 1
        if itr2 == len(items):
            print "%s items read completed successfully." % len(items)
        else:
            print "Not all items read."
        itr1 += 1
    if len(filenames) == itr1:
        print "%s files read completed successfully." % len(filenames)
        if len(filenames) == 0:
            print "You probably have to crawl lists first."
    else:
        print "Not all files read."
def original_count_blog_by_directory(directory_seq, date, basedir, seconddir='lists'):
	
	targetpath = target_path(directory_seq, date, basedir,seconddir)
	filenames = glob.glob('%s/*.json' % targetpath)
	cnt_lists_blog = 0
	for i, filename in enumerate(filenames):
		items = file_read(filename)
		cnt_lists_blog =  cnt_lists_blog + len(items)
	return cnt_lists_blog
Exemple #20
0
	def on_post_save(self, view):
		if utils.get_language()!="jsf":return
		window=sublime.active_window()
		folders=window.folders()
		if not folders:return

		folderProyecto=folders[0]
		if not os.path.exists(os.path.join(folderProyecto, "pom.xml")):return
		server=utils.get_preference("server")
		
		folderDeploy=server_folder_deploy[server]
		self.folderDeploy=folderDeploy

		filepath=utils.get_filepath()
		self.filepath=filepath
		
		if server=="weblogic":
			threading.Thread(target=self.reemplazarTodos).start()
			return

		if server!="jboss":
			folderDeploy=folderDeploy+os.sep+os.listdir(folderDeploy)[0]
			self.folderDeploy=folderDeploy

			folderDeploy=os.path.normpath(folderDeploy)
			print("the folder deploy is : "+folderDeploy)
		nombreProyecto=filepath.replace(folderProyecto+os.sep, "")
		#print("el nombre del proyceto es : "+nombreProyecto)
		nombreProyecto=nombreProyecto[:nombreProyecto.find(os.sep)]
		#print("el nuevo nombre del proyecto es: "+nombreProyecto)
		#print("el filepath es : "+filepath)
		#print("el folderDeploy es : "+folderDeploy)
		fileLocation=filepath[filepath.find("webapp"+os.sep)+7:]
		#print("el fileLocation is: "+fileLocation)
		print(server)
		

		print("el nombre del proyecto es : "+nombreProyecto)
		folders=os.listdir(folderDeploy)

		folders=[os.path.join(folderDeploy, x) for x in folders]
		
		def comparador(x):return os.path.getmtime(x)

		folders=sorted(folders, key=comparador, reverse=True)
		print(folders)
		for folderS in folders:
			for folder in os.listdir(folderS):
				print(folder)
				if folder.find(nombreProyecto)!=-1:
					fileLocation=folderS+os.sep+folder+os.sep+fileLocation
					print("la nueva localizacion del archivo es : "+fileLocation)
					utils.file_write(fileLocation, utils.file_read(filepath))
					#print("escrito con exito")
					return
				else:print("no")
Exemple #21
0
def plot_group(inputdir, outputdir, recall_pct):
    filecount = 0
    group = os.path.basename(inputdir)
    filepattern = os.path.join(inputdir, filebase + r'*')
    res = StructDict()
    for resultfile in glob.iglob(filepattern):
        # read in the results files per query size
        # get the query size from the end part of the file name
        qsize = os.path.basename(resultfile).split('.')[-1]
        qsize = int(qsize)
        lines = file_read(resultfile)
        # 1st line is min/max
        # 2nd line is 1st, 2nd, 3rd quartile
        minmax = [float(x) for x in lines[0].split()]
        quartiles = [float(x) for x in lines[1].split()]
        vals = StructDict(
                {'min': minmax[0],
                 'max': minmax[1],
                 'quart1': quartiles[0],
                 'quart2': quartiles[1],
                 'quart3': quartiles[2]}
                )
        res[qsize] = vals
        filecount += 1

    # create an array of result values per query size
    qsizes = list(res.keys())
    qsizes.sort()
    print(qsizes)
    quartile_1 = [res[qsize]['quart1'] for qsize in qsizes]
    quartile_2 = [res[qsize]['quart2'] for qsize in qsizes]
    quartile_3 = [res[qsize]['quart3'] for qsize in qsizes]
    print(res)
    print(quartile_1)
    print(quartile_2)
    print(quartile_3)
    print("Dir {} processed {} files".format(group, filecount))

    # plot the array
    plt.plot(qsizes, quartile_1, color="gray")
    plt.plot(qsizes, quartile_2, color="black")
    plt.plot(qsizes, quartile_3, color="gray")
    plt.ylim(ymin=0)
    plt.grid(which='major', linewidth='0.5', color='darkgray')
    plt.grid(which='minor', linewidth='0.5', color='lightgray')
    plt.fill_between(qsizes, quartile_1, quartile_3, color="gray")
    plt.title('Processes {} genes'.format(group))
    plt.xlabel('Query size')
    plt.ylabel('Fold precision at {}% over random'.format(recall_pct))
    # plt.draw()
    # plt.pause(1)
    filename = "{}r{}.pdf".format(group, recall_pct)
    outputfile = os.path.join(outputdir, filename)
    plt.savefig(outputfile)
    resfile = os.path.join(outputdir, filename.replace('pdf', 'csv'))
    save_vals(res, resfile)
Exemple #22
0
def plot_curve(inputdir, outputdir):
    filecount = 0
    res = StructDict()
    filepattern = os.path.join(inputdir, filebase + r'*')
    for resultfile in glob.iglob(filepattern):
        # read in the results files per query size
        # get the query size from the end part of the file name
        recall_pct = os.path.basename(resultfile).split('.')[-1]
        recall_pct = int(recall_pct)
        lines = file_read(resultfile)
        # 1st line is min/max
        # 2nd line is 1st, 2nd, 3rd quartile
        minmax = [float(x) for x in lines[0].split()]
        quartiles = [float(x) for x in lines[1].split()]
        vals = StructDict({
            'min': minmax[0],
            'max': minmax[1],
            'quart1': quartiles[0],
            'quart2': quartiles[1],
            'quart3': quartiles[2]
        })
        res[recall_pct] = vals
        filecount += 1

    # create an array of result values per query size
    recall_depths = list(res.keys())
    recall_depths.sort()
    print(recall_depths)
    quartile_1 = [res[depth]['quart1'] for depth in recall_depths]
    quartile_2 = [res[depth]['quart2'] for depth in recall_depths]
    quartile_3 = [res[depth]['quart3'] for depth in recall_depths]
    print(res)
    print(quartile_1)
    print(quartile_2)
    print(quartile_3)
    print("Processed {} files".format(filecount))

    # plot the array
    plt.plot(recall_depths, quartile_1, color="gray")
    plt.plot(recall_depths, quartile_2, color="black")
    plt.plot(recall_depths, quartile_3, color="gray")
    plt.ylim(ymin=0)
    plt.xscale('log')
    plt.grid(which='major', linewidth='0.5', color='darkgray')
    plt.grid(which='minor', linewidth='0.5', color='lightgray')
    plt.fill_between(recall_depths, quartile_1, quartile_3, color="gray")
    plt.title('Seek Search Performance')
    plt.xlabel('Recall Percent')
    plt.ylabel('Fold precision over random')
    # plt.draw()
    # plt.pause(1)
    filename = "precision_vs_depth.pdf"
    outputfile = os.path.join(outputdir, filename)
    plt.savefig(outputfile)
    resfile = os.path.join(outputdir, filename.replace('pdf', 'csv'))
    save_vals(res, resfile)
Exemple #23
0
 def _merge_seeds(self, seeds, values):
     """merge the main seed file with the recipe(s) and additional seeds 
     return it as a string"""
     result = ''
     for seed in seeds:
         file_name = os.path.join(self.cfg['paths']['seeds'], seed + '.seed')
         logging.info('applying template values to "%s"', file_name)
         data = utils.file_read(file_name)
         result += utils.apply_template(data, values, file_name)
     return result
Exemple #24
0
    def run(self, edit):
        archivos=utils.get_files({"folder":sublime.packages_path(), "ext":"py"})
        comandos=[]
        for archivo in archivos:
            texto=utils.file_read(archivo)
            comandos+=re.findall("class\s+([\w]+)\(sublime_plugin.TextCommand\):", texto, flags=re.IGNORECASE)
        comandos=list(set(comandos))
#        print(comandos)
        for comando in comandos:
            self.generar_comando(comando)
Exemple #25
0
def makePlot1aFilelists(golddir, resultdir, outputdir, genefile):
    golddir = utils.makeAbsolutePath(golddir)
    resultdir = utils.makeAbsolutePath(resultdir)
    outputdir = utils.makeAbsolutePath(outputdir)
    genefile = utils.makeAbsolutePath(genefile)

    # Note: query and gold standard files will be from the known good results dir
    #  in this case the 'golddir'
    # gscore files will be from the 'resultdir'

    filecount = 0
    filepattern = os.path.join(golddir, r'[0-9]*.query')
    for queryfile in glob.iglob(filepattern):
        filecount += 1
        # print(queryfile)
        # read file to find query size
        lines = utils.file_read(queryfile)
        assert len(lines) == 1
        num_genes = len(lines[0].split())
        outbasename = os.path.join(outputdir, 'filelist_q{}'.format(num_genes))

        # append the files to the appropriate file lists
        # query file list
        queryfilelist = outbasename + '.query'
        utils.file_appendline(queryfilelist, queryfile)

        # gold standard file list
        #  - gold standard file is the genes that should be correlated with the query genes
        goldfilelist = outbasename + '.gold'
        utils.file_appendline(goldfilelist, re.sub('query$', 'gold',
                                                   queryfile))

        # query results 'gscore' file list
        #  - gscore are the resulting gene correlation scores from the SeekMiner query result
        gscorefilelist = outbasename + '.gscore'
        qbasename = os.path.basename(queryfile)
        gscorefile = os.path.join(resultdir,
                                  qbasename.replace('query', 'gscore'))
        utils.file_appendline(gscorefilelist, gscorefile)

        # include file list - genes to include in the results
        includefile = outbasename + '.include'
        utils.file_appendline(includefile, genefile)

        # excluse file list
        # exclude all genes in the query file
        excludefile = outbasename + '.exclude'
        utils.file_appendline(excludefile, queryfile)

    if filecount == 0:
        print("No matching files found")
        exit(-1)

    print("Num files processed: {}".format(filecount))
Exemple #26
0
 def _merge_seeds(self, seeds, values):
     """merge the main seed file with the recipe(s) and additional seeds
     return it as a string"""
     result = ''
     for seed in seeds:
         file_name = os.path.join(self.cfg['paths']['seeds'],
                                  seed + '.seed')
         logging.info('applying template values to "%s"', file_name)
         data = utils.file_read(file_name)
         result += utils.apply_template(data, values, file_name)
     return result
Exemple #27
0
 def crearArchivo(self):
     archivo=self.opcion.split(":")
     print(archivo)
     self.lang=archivo[0].strip()
     lang=self.lang
     archivo=lang+"/"+archivo[1].strip()+"."+self.extension
     archivo=TEMPLATES_PATH+archivo
     print("la ubicacion del archivo es : "+archivo)
     self.text=utils.file_read(archivo)
     print(self.text)
     window=sublime.active_window()
     window.show_input_panel("File Name", "", self.pedirNombre, None, None)
Exemple #28
0
    def generate(self):
        """generate the pxe boot file"""
        self.pxe_variables.update({
            'config': self.config,
            'seeds': self.seeds,
            'seed_host': cfg['settings']['seed_host'],
            'seed_port': cfg['settings']['bottle_port'],
            'address': self.address,
            'overlay': self.overlay,
            'puppet_manifests': self.puppet,
            'host_name': self.host_name,
            'dns_domain': self.dns_domain,
            'fqdn': self.fqdn,
            'query': urllib.urlencode([('address', self.address)]),
            'date_generated': utils.date_time(),
            'date_disabled': '',
            'kernel': '%s/%s/%s' % ('seedbank', self.release, 'linux'),
            'initrd': '%s/%s/%s' % ('seedbank', self.release, 'initrd.gz')
        })

        if self.config:
            yaml_file = os.path.join(cfg['paths']['configs'], self.config)
            yaml_file = yaml_file + '.yaml'
            overrides = utils.yaml_read(yaml_file)
            if 'pxe' in overrides:
                cfg['pxe'].update(overrides['pxe'])
        values = cfg['pxe']
        self.pxe_variables.update(values)

        distribution = self.release.split('-')[0]
        file_name = cfg[distribution]['template_pxe']
        file_name = os.path.join(cfg['paths']['templates'], file_name)
        if not os.path.isfile(file_name):
            err = 'file "%s" does not exist (hint: check the templates '\
                'section in your settings)' % file_name
            raise utils.FatalException(err)

        pxe_variables_custom = []
        for variable in self.variables:
            key, value = variable
            pxe_variables_custom.append('# %s = %s' % (key, value))
            self.pxe_variables[key] = value
        pxe_variables_custom = '\n'.join(pxe_variables_custom)

        data = utils.file_read(file_name)
        data = utils.apply_template(data, self.pxe_variables, file_name)
        if pxe_variables_custom:
            data = re.sub(
                '(#\n# \*\*\* end - seedBank pxe variables \*\*\*)',
                pxe_variables_custom + '\n\\1', data)
        return data
def return_information(directory_seq, basedir, date, crawler_version, seconddir ="lists", thirddir="texts", debug=False):
    if debug:
        print "Start blog text crawling..."
    directory_seq = int(directory_seq)
    try:
        targetpath = '%s/%s/%02d/%s/%02d/%02d'\
                         % (basedir, seconddir, directory_seq,\
                            int(date[0:4]), int(date[5:7]), int(date[8:10]))
    except TypeError as e:
        print e
        raise Exception('Please check input values (ex: the date)')
    itr1 = 0
    filenames = glob.glob('%s/*.json' % targetpath)
    for filename in filenames:
        print filename
        items = file_read(filename)
        itr2 = 0
        for i, blog in enumerate(items):
            try:
                check_targetpath = '%s/%s/%02d/%s/%02d/%02d'\
                                % (basedir, thirddir, directory_seq,\
                                   int(date[0:4]), int(date[5:7]), int(date[8:10]))
                check_filename = '%s.json' % (items[i]['logNo'])
                if not os.path.isfile('%s/%s' % (check_targetpath, check_filename)):
                    web_crawl(items[i]['blogId'],
                              items[i]['logNo'],
                              items[i]['crawledTime'],
                              crawler_version,
                              items[i]['title'],
                              items[i]['writtenTime'],
                              items[i]['url'],
                              items[i]['tags'],
                              date,
                              directory_seq,
                              basedir, debug=debug)
            except Exception as e:
                print e
            itr2 += 1
        if itr2 == len(items):
            print "%s items read completed successfully." % len(items)
        else:
            print "Not all items read."
        itr1 += 1
    if len(filenames) == itr1:
        print "%s files read completed successfully." % len(filenames)
        if len(filenames)==0:
            print "You probably have to crawl lists first."
    else:
        print "Not all files read."
    def run(self, edit, **args):
        if not args.get("nombre"):return
        nombre=args.get("nombre")
        for c in os.listdir(TEMPLATES_PATH):
#            print(c)
            if nombre.lower()==c.lower()[:c.rfind(".")]:
                texto=utils.file_read(TEMPLATES_PATH+"/"+c)
                self.texto=texto
                if not utils.get_text().strip():
                    self.insertar()
                else:
#                    print("no tiene texto")
                    self.texto=texto
                    window=sublime.active_window()
                    window.show_input_panel("", c[c.rfind("."):], self.crear_archivo, None, None)
    def run(self, edit):
        d={}
        modulos=utils.file_read(GO_API_FILE)
        lineas=modulos.splitlines()
        for linea in lineas:
            if linea:
                ocurrencias=re.findall(REGEX_FUNCION, linea, re.IGNORECASE)
                if ocurrencias:
                    paquete=ocurrencias[0][0]
                    if paquete.find("/")!=-1:paquete=paquete[paquete.find("/")+1:]
                    funcion=ocurrencias[0][1]
                    if not d.get(paquete):d[paquete]=[]
                    d[paquete].append(funcion)

        utils.save_json(GO_MAIN_MODULE, d)
        for key in d.keys():
            utils.save_json(GO_MODULES+key+".json", d[key])
def original_count_blog_by_time(directory_seq, date, basedir, seconddir='lists'):

	time_cnt = {}
	for d in range(0, 2):
		tmp_date = datetime.strptime(date, '%Y-%m-%d') + timedelta(days=d)
		tmp_date = tmp_date.isoformat()
		
		targetpath = target_path(directory_seq, tmp_date, basedir,seconddir)
		filenames = glob.glob('%s/*.json' % targetpath)
		for i, filename in enumerate(filenames):
			items = file_read(filename)
			for j, item in enumerate(items):
				written_time = item['writtenTime']
				if int(written_time[8:10]) == int(date[8:10]):
					key = int(written_time[11:13])
					if key in time_cnt.keys():
						time_cnt[key] += 1 
					else:
						time_cnt[key] = 0
	return [time_cnt]
Exemple #33
0
def return_information(directory_seq, basedir, date, crawler_version,\
                       seconddir ="lists", thirddir="comments", debug=False):
    directory_seq = int(directory_seq)
    try:
        targetpath = '%s/%s/%02d/%s/%02d/%02d'\
                         % (basedir, seconddir, directory_seq,\
                            int(date[0:4]), int(date[5:7]), int(date[8:10]))
    except TypeError:
        raise Exception('Please check input values (ex: the date)')
    itr1 = 0
    filenames = glob.glob('%s/*.json' % targetpath)
    for filename in filenames:
        print filename
        items = file_read(filename)
        itr2 = 0
        for i, blog in enumerate(items):
            check_targetpath = '%s/%s/%02d/%s/%02d/%02d'\
                            % (basedir, thirddir, directory_seq,\
                               int(date[0:4]), int(date[5:7]), int(date[8:10]))
            check_filename = '%s.json' % (items[i]['logNo'])
            if not os.path.isfile('%s/%s' %
                                  (check_targetpath, check_filename)):
                comment_crawl(items[i]['blogId'],
                              items[i]['logNo'],
                              items[i]['writtenTime'],
                              date,
                              directory_seq,
                              basedir,
                              crawler_version,
                              debug=debug)
                time.sleep(0.1)
            itr2 += 1
        if itr2 == len(items):
            print "%s items read completed successfully." % len(items)
        else:
            print "Not all items read."
        itr1 += 1
    if len(filenames) == itr1:
        print "%s files read completed successfully." % len(filenames)
    else:
        print "Not all files read."
def return_information(directory_seq, basedir, date, crawler_version,\
                       seconddir ="lists", thirddir="comments", debug=False):
    directory_seq = int(directory_seq)
    try:
        targetpath = '%s/%s/%02d/%s/%02d/%02d'\
                         % (basedir, seconddir, directory_seq,\
                            int(date[0:4]), int(date[5:7]), int(date[8:10]))
    except TypeError:
        raise Exception('Please check input values (ex: the date)')
    itr1 = 0
    filenames = glob.glob('%s/*.json' % targetpath)
    for filename in filenames:
        print filename
        items = file_read(filename)
        itr2 = 0
        for i, blog in enumerate(items):
            check_targetpath = '%s/%s/%02d/%s/%02d/%02d'\
                            % (basedir, thirddir, directory_seq,\
                               int(date[0:4]), int(date[5:7]), int(date[8:10]))
            check_filename = '%s.json' % (items[i]['logNo'])
            if not os.path.isfile('%s/%s' % (check_targetpath, check_filename)):
                comment_crawl(items[i]['blogId'],
                              items[i]['logNo'],
                              items[i]['writtenTime'],
                              date, directory_seq, basedir, crawler_version, debug=debug)
                time.sleep(0.1)
            itr2 += 1
        if itr2 == len(items):
            print "%s items read completed successfully." % len(items)
        else:
            print "Not all items read."
        itr1 += 1
    if len(filenames) == itr1:
        print "%s files read completed successfully." % len(filenames)
    else:
        print "Not all files read."
Exemple #35
0
                           type=str,
                           required=True,
                           help='input file with list of correlated genes')
    argParser.add_argument('--group-list',
                           '-l',
                           type=str,
                           required=True,
                           help='list of GO groups to pull out')
    argParser.add_argument('--output-file',
                           '-o',
                           type=str,
                           required=True,
                           help='output file of matching groups')
    args = argParser.parse_args()

    glist = file_read(args.group_list)

    gmt = parse_gmt(args.gmt_file, 0, sys.maxsize)

    gmt_dict = {}
    for group in gmt:
        desc = group['desc']
        desc = desc.lower()
        desc = re.sub('__', '_', desc)
        gmt_dict[desc] = group

    with open(args.output_file, 'w') as fp:
        count = 0
        for desc in glist:
            desc = desc.rstrip()
            desc = desc.lower()
Exemple #36
0
def main(argv):

    if len(argv) == 0:
        print ("""\nUsage: python3 -m adapter config-file.ini ( stop | daemon )

""" + adapter_app +""", version """ + adapter_version +"""

  config-file               Configuration file, see mcfeedadapter_example.conf for examples.
  action                    Optional, one of the following:
      stop                  Stop running adapter
      daemon                Start adapter as daemon
""")
        return 0

    print (adapter_app + """, version """ + adapter_version + "\n")
    print ("")
       
    args=readconf.parse_argv(argv)
    if cfg.action is not None:
        if cfg.action == 'daemon':
            utils.become_daemon();
        
    if not readconf.read_conf(args):
        return 1;

    if not initialize_outputs():
        close_outputs()
        return 1;
    
    current_pid=utils.file_read(cfg.pid_file)
       
    if cfg.action is not None:
        if (cfg.action == 'stop') or (cfg.action == 'status'):
            if current_pid is None:
                print("Adapter is not running\n")
                return 0
            process_id=int(current_pid)
            print("Adapter found, PID " + str(process_id))
            if cfg.action == 'stop':
                utils.remove_file(cfg.pid_file)
                while utils.is_process_running(process_id):
                    time.sleep(0.05)
                print("Adapter stopped\n")
            return 0

    if current_pid is not None:
        utils.print_error("Adapter for this feed is already running")
        return 1
    
    
    utils.file_write(cfg.pid_file,utils.get_pid())

    signal.signal(signal.SIGINT,term_signal_handler)
    signal.signal(signal.SIGTERM,term_signal_handler)
    
    current_pid=utils.file_read(cfg.pid_file)
    utils.log_write("Adapter started, PID: " + str(current_pid))
    while current_pid is not None:
        iteration_result=adapter_iteration()
        if cfg.action is not None:
            if cfg.action == 'term':
                utils.remove_file(cfg.pid_file)
                current_pid = None
                
        if current_pid is not None:
            if not iteration_result[0]:
                utils.print_error("Adapter encountered error when processing feed records and will be stopped")
                utils.remove_file(cfg.pid_file)
                return 1
            
            if not iteration_result[1]:
                time.sleep(0.1)
            
        current_pid=utils.file_read(cfg.pid_file)

    close_outputs()
    utils.log_write("Adapter stopped")
Exemple #37
0
    def generate(self):
        """generate the pxe boot file"""
        self.pxe_variables.update({
            'config':
            self.config,
            'seeds':
            self.seeds,
            'seed_host':
            cfg['settings']['seed_host'],
            'seed_port':
            cfg['settings']['bottle_port'],
            'address':
            self.address,
            'overlay':
            self.overlay,
            'puppet_manifests':
            self.puppet,
            'host_name':
            self.host_name,
            'dns_domain':
            self.dns_domain,
            'fqdn':
            self.fqdn,
            'query':
            urllib.urlencode([('address', self.address)]),
            'date_generated':
            utils.date_time(),
            'date_disabled':
            '',
            'kernel':
            '%s/%s/%s' % ('seedbank', self.release, 'linux'),
            'initrd':
            '%s/%s/%s' % ('seedbank', self.release, 'initrd.gz')
        })

        if self.config:
            yaml_file = os.path.join(cfg['paths']['configs'], self.config)
            yaml_file = yaml_file + '.yaml'
            overrides = utils.yaml_read(yaml_file)
            if 'pxe' in overrides:
                cfg['pxe'].update(overrides['pxe'])
        values = cfg['pxe']
        self.pxe_variables.update(values)

        distribution = self.release.split('-')[0]
        file_name = cfg[distribution]['template_pxe']
        file_name = os.path.join(cfg['paths']['templates'], file_name)
        if not os.path.isfile(file_name):
            err = 'file "%s" does not exist (hint: check the templates '\
                'section in your settings)' % file_name
            raise utils.FatalException(err)

        pxe_variables_custom = []
        for variable in self.variables:
            key, value = variable
            pxe_variables_custom.append('# %s = %s' % (key, value))
            self.pxe_variables[key] = value
        pxe_variables_custom = '\n'.join(pxe_variables_custom)

        data = utils.file_read(file_name)
        data = utils.apply_template(data, self.pxe_variables, file_name)
        if pxe_variables_custom:
            data = re.sub('(#\n# \*\*\* end - seedBank pxe variables \*\*\*)',
                          pxe_variables_custom + '\n\\1', data)
        return data
# -*-coding:utf-8-*-
import json
import os
import glob
import csv

from datetime import datetime, timedelta

from utils import checkdir, file_read, get_today, get_version

if __name__ == "__main__":

    num_directory_seq = 31
    basedir = "/home/web/public_html/data/naver-blog"
    seconddir = "statistics"
    targetpath = "%s/%s" % (basedir, seconddir)
    filenames = glob.glob("%s/*.json" % targetpath)
    cnt_files = len(filenames)
    table = [[0 for i in range(cnt_files)] for j in range(num_directory_seq + 2)]
    for i, filename in enumerate(filenames):
        items = file_read(filename)
        print filename
        table[0][i] = filename.rsplit("statistics/", 1)[1].rsplit(".", 1)[0]
        for directory_seq in range(len(items)):
            table[directory_seq + 1][i] = items[directory_seq]["countTextsBlog"]
            table[num_directory_seq + 1][i] = table[num_directory_seq + 1][i] + table[directory_seq + 1][i]
    savefilename = "/home/web/public_html/data/naver-blog/test_file.csv"
    csvfile = open(savefilename, "w")
    wr = csv.writer(csvfile, dialect="excel")
    [wr.writerow(r) for r in table]
def main() -> None:
    prefix   = "cache/"
    now      = datetime.datetime.today()
    time_ym  = now.strftime("%Y-%m")
    time_dmy = now.strftime("%d. %b %Y")
    semester = utils.json_read(prefix + "current_semester.json", None)
    semester = semester[0] +" "+ semester[1]
    folder   = "gh-pages/"

    pflicht: List[Tuple[str, str]] = []
    fields: Dict[str, Dict[str, Tuple[str, str]]] = {}
    pflicht = utils.json_read(prefix + "pre-tucan-pflicht.json", pflicht)
    fields = utils.json_read(prefix + "pre-inferno.json", fields)

    #nebenfach = utils.json_read("nebenfach.json")
#    back = utils.groupby(((course, major +" · "+ category)
#            for major,v in nebenfach.items()
#            for category,v in v.items()
#            for module in v
#            for course in module), key=lambda x:x[0])
#    back = {k:["Y Nebenfach · " + " &<br> ".join(i[1] for i in v),""] for k,v in back}
#    fields = [back] + list(fields.values())
#    print(json.dumps(fields, indent=2))

    # dist/main.js with npm; code.orig.js without npm
    if os.path.exists("dist/main.js"):
      CODE_FILE = "dist/main.js"
    else:
      CODE_FILE = "code.orig.js"

    page_tmpl  = utils.file_read("page.html")
    index_tmpl = utils.file_read("index.html")
    code_tmpl  = utils.file_read(CODE_FILE)
    style_tmpl = utils.file_read("style.css")

    def filename(reg: str) -> str:
      return "".join(c for c in reg if c.isalnum())

    regulations = [
      (k,
       k.replace("B.Sc.", "Bachelor")
        .replace("M.Sc.", "Master")
        .replace(" (2015)", ""),
       filename(k) + ".html")
      for k in fields.keys()
      if k.endswith(" (2015)")
     ] + [
      # other FBs?
      ("BauUmwelt", "FB 13 Bau, Umwelt", "BauUmwelt.html")
    ]

    listy = [
      {'href': href, 'title': semester +" "+ display_regulation}
      for regulation, display_regulation, href in regulations
      if display_regulation.endswith(" Informatik")
      if not display_regulation.startswith("FB ")
    ]
    experimentallist = [
      {'href': href, 'title': semester +" "+ display_regulation}
      for regulation, display_regulation, href in regulations
      if not display_regulation.endswith(" Informatik")
      if not display_regulation.startswith("FB ")
    ]
    speciallist = [
      {'href': href, 'title': semester +" "+ display_regulation}
      for regulation, display_regulation, href in regulations
      if display_regulation.startswith("FB ")
    ]
    index_data = {
      "list": listy,
      "experimentallist": experimentallist,
      "speciallist": speciallist,
    }
    utils.file_write(folder + "/index.html", stache(index_tmpl, index_data))
    utils.file_write(folder + "/main.js", code_tmpl)
    utils.file_write(folder + "/style.css", style_tmpl)

    print(regulations)
    for regulation, display_regulation, href in regulations:
        print(prefix + "-" + filename(regulation) + ".json")
        modules: Dict[str, Module] = {}
        modules = utils.json_read(prefix + "-" + filename(regulation) + ".json", modules)
        if modules == []: continue # if file exists

        data = [clean(module_id, module, fields, regulation)
                for module_id, module in modules.items()]

        data.sort(key=lambda x: (x['category'], x['id'])) # -int(x['credits'])
        js_data = json.dumps(data, indent=1)

        page_data = {
          "today":      time_dmy,
          "semester":   semester,
          "regulation": display_regulation,
          "js_data":    js_data,
          "content":    generate_page(data)
        }
        utils.file_write(folder + "/" + href, stache(page_tmpl, page_data))

    print("finished")
Exemple #40
0
import csv

from datetime import datetime, timedelta

from utils import checkdir, file_read, get_today, get_version

if __name__ == '__main__':

    num_directory_seq = 31
    basedir = '/home/web/public_html/data/naver-blog'
    seconddir = 'statistics'
    targetpath = '%s/%s' % (basedir, seconddir)
    filenames = glob.glob('%s/*.json' % targetpath)
    cnt_files = len(filenames)
    table = [[0 for i in range(cnt_files)]
             for j in range(num_directory_seq + 2)]
    for i, filename in enumerate(filenames):
        items = file_read(filename)
        print filename
        table[0][i] = filename.rsplit("statistics/", 1)[1].rsplit(".", 1)[0]
        for directory_seq in range(len(items)):
            table[directory_seq +
                  1][i] = items[directory_seq]['countTextsBlog']
            table[num_directory_seq +
                  1][i] = table[num_directory_seq +
                                1][i] + table[directory_seq + 1][i]
    savefilename = '/home/web/public_html/data/naver-blog/test_file.csv'
    csvfile = open(savefilename, 'w')
    wr = csv.writer(csvfile, dialect='excel')
    [wr.writerow(r) for r in table]