def get(self, key, **kwargs): info = {} recursive = kwargs.get("recursive", False) rsp = self.client.read(key, **kwargs) for child in rsp.get_subtree(): if pathsplit(child.key)[0] != key: # skip children's attrs continue if recursive and child.dir: # sub children, make an dict for c in child.children: root, attr = pathsplit(c.key) basename, id = split_letters(pathsplit(root)[1]) if root == child.key and basename == attr: # only match the main info attrs = "%sS" % attr info.setdefault(attrs, {})[id] = json.loads(c.value) continue else: try: value = json.loads(child.value) except TypeError: if recursive: raise TypeError("%s is a directory" % child.key) else: continue _, attr = pathsplit(child.key) info[attr] = value return info
def test_divide_train_leaderboard(): sys.path.append('..') import pydream2015 indir = join(pathsplit(pydream2015.__file__)[0], 'test_input') outdir = join(pathsplit(pydream2015.__file__)[0], 'test_output') indir = os.path.abspath(indir) outdir = os.path.abspath(outdir) pydream2015.initdatapath(indir, outdir) trainids, testids = pydream2015.util.divide_train_leaderboard()
def create_generator(folder, batch_size=2): x_dir = join(folder, "x") y_dir = join(folder, "y") assert exists(x_dir) is True assert exists(y_dir) is True # FIX: glob.glob is waaaaay faster than [f for f in listdir() if isfile(f)] x_files = glob.glob(join(x_dir, "*.tif")) + glob.glob( join(x_dir, "*.tiff")) y_files = glob.glob(join(y_dir, "*.tif")) + glob.glob( join(y_dir, "*.tiff")) assert len(x_files) == len(y_files) # Number of files nbr_files = len(x_files) # Let's begin the training/validation with the first file index = 0 while True: x, y = list(), list() for i in range(batch_size): # Get a new index index = (index + 1) % nbr_files # MUST be true (files must have the same name) assert pathsplit(x_files[index])[-1] == pathsplit( y_files[index])[-1] x_img = img_to_array(load_img(x_files[index])) y_img = img_to_array(load_img(y_files[index])) # Resize each image x_img, y_img = imresize(x_img, self.input_shape[:2]), imresize( y_img, self.input_shape[:2]) # Apply a transformation on these images # x_img, y_img = transfromXY(x_img, y_img) # Change y shape : (m, n, 3) -> (m, n, 2) (2 is the class number) temp_y_img = np.zeros(self.input_shape[:2] + (1, )) temp_y_img[y_img[:, :, 0] == 0] = 0 temp_y_img[y_img[:, :, 0] == 255] = 1 y_img = temp_y_img # Convert to float x_img = x_img.astype('float32') y_img = y_img.astype('float32') # Divide by the maximum value of each pixel x_img /= 255 # Append images to the lists x.append(x_img) y.append(y_img) yield np.array(x), np.array(y)
def createGenerator(dir, batch_size=2): x_dir = join(dir, "x") y_dir = join(dir, "y") assert exists(x_dir) == True assert exists(y_dir) == True # FIX: glob.glob is waaaaay faster than [f for f in listdir() if isfile(f)] x_files = glob.glob(join(x_dir, "*.jpg")) + glob.glob( join(x_dir, "*.png")) y_files = glob.glob(join(y_dir, "*.jpg")) + glob.glob( join(y_dir, "*.png")) assert len(x_files) == len(y_files) while True: x, y = list(), list() for _ in range(batch_size): # Get a random index between 0 and len(x_files) index = randint(0, len(x_files) - 1) # MUST be true (files must have the same name) assert pathsplit(x_files[index])[-1] == pathsplit( y_files[index])[-1] x_img = img_to_array(load_img(x_files[index])) y_img = img_to_array(load_img(y_files[index])) # Resize each image x_img, y_img = imresize(x_img, self.input_shape[:2]), imresize( y_img, self.input_shape[:2]) # Apply a transformation on these images x_img, y_img = transfromXY(x_img, y_img) # Change y shape : (m, n, 3) -> (m, n, 2) (2 is the class number) temp_y_img = np.zeros(self.input_shape[:2] + (self.__nClasses, )) temp_y_img[y_img[:, :, 1] != 255] = 0 temp_y_img[y_img[:, :, 1] == 255] = 1 y_img = temp_y_img # Convert to float x_img = x_img.astype('float32') y_img = y_img.astype('float32') # Divide by the maximum value of each pixel x_img /= 255 # Append images to the lists x.append(x_img) y.append(y_img) yield np.array(x), np.array(y)
def samples(self): """ Yield labeled samples from txt files. Labels are the parent directory of the file. """ # this works in this case because we have one sample # per file n = 0 n_samples = len(self.datafiles) for datafile in self.datafiles: label = pathsplit(dirname(datafile))[1] split = pathsplit(datafile)[0].split('/')[-2] with open(datafile, 'r') as f: sample = f.read() yield self.sample_class(sample, label, self.dataset, split)
def download_file(self, filename, file_size): if not self.connection: return None valid_filename = pathsplit(filename)[1] bytes_left = int(file_size) byteat = self.byteat if not valid_filename: return False print "Start receiving the file {} bytes".format(file_size), with open(join(UPLOAD_DIR, valid_filename), 'wb') as file: while bytes_left > 0: # Check how many bytes left. if byteat > bytes_left: byteat = bytes_left data = self.connection.recv(byteat) file.write(data) bytes_left -= byteat # print ".", print "done." return True
def _prep_file_under_git(path, filename): """Get instance of the repository for the given filename Helper to be used by few functions """ if filename is None: # path provides the path and the name path, filename = pathsplit(path) try: # if succeeds when must not (not `annexed`) -- fail repo = get_repo_instance(path, class_=AnnexRepo) annex = True except RuntimeError as e: # TODO: make a dedicated Exception if "No annex repository found in" in str(e): repo = get_repo_instance(path, class_=GitRepo) annex = False else: raise # path to the file within the repository # repo.path is a "realpath" so to get relpath working correctly # we need to realpath our path as well path = op.realpath(path) # intentional realpath to match GitRepo behavior file_repo_dir = op.relpath(path, repo.path) file_repo_path = filename if file_repo_dir == curdir else opj( file_repo_dir, filename) return annex, file_repo_path, filename, path, repo
def make_joblist(self): """Make a list of jobs with all variants for all infiles and create outfile directories. The in/out mapping is file.* > outdir/variant_name/provider/file.mp4.""" def get_task_lock_file(out_filename): "Get task-lock filename." return "%s.X" % splitext(out_filename)[0] def get_logfile(out_filename): "Get logfile name" return "%s.log" % splitext(out_filename)[0] for infile in self.infiles: if not os.path.exists(infile): print "Warning: infile %s does not exist. Skipping it" % infile continue infile_base = splitext(pathsplit(infile)[1])[0] for variant in self.config: outdir = normpath(pathjoin(self.outdir, infile_base)) if not os.path.exists(outdir): os.makedirs(outdir) outfile = pathjoin(outdir, variant['name'] + '.mp4') taskfile = get_task_lock_file(outfile) logfile = get_logfile(outfile) if os.path.exists(taskfile) or (not os.path.exists(outfile)): job = {'inFile' : infile, 'outFile' : outfile, 'lockFile' : taskfile, 'get_logfile' : logfile} job.update(variant) self.jobs.append(job) if len(self.jobs) == self.max_jobs: break
def main(): nei = NearestNeighbors() nei.fit(matrix) path = '{0}/'.format(pathsplit(abspath(__file__))[0]) jsonfile = open(path + '{1}_rand-{0}-nn.json'.format(n_neighbors, name), 'w') nodes = [{'name': i, 'group': groups[i]} for i in range(len(matrix))] links = [] for i in range(len(matrix)): dist, idnei = nei.kneighbors(matrix[i], n_neighbors=n_neighbors + 1) dist, idnei = dist[0], idnei[0] for j in range(len(idnei[1:])): links.append({ "source": i, "target": idnei[j + 1], "value": 10 * (1 - dist[j + 1]) }) jsondumped = json.dumps({'nodes': nodes, 'links': links}, indent=2) jsonfile.write(jsondumped)
def _args(model, kwargs, create_parent=False): path = kwargs.pop('path', None) if path: try: user = kwargs['user'] except KeyError: raise ValueError('`user` argument required with `path`') parent, name = pathsplit(path.lstrip('/')) parent = parent if parent else '' if name == '' and parent == '': # If both are empty, caller is asking for '/' or similar, which # cannot be a file. raise UserFile.DoesNotExist() elif parent == '': # If only parent is empty, caller wants a file within root. parent = UserDir.objects.get_root(user) else: # If neither are empty, caller wants a file with a directory. try: parent = UserDir.objects.get(user=user, path=parent) except UserDir.DoesNotExist: # Caller may want us to create parent dirs (-p). For # example, during create(). if not create_parent: # If not, raise. raise UserFile.DoesNotExist() parent = UserDir.objects.create(user=user, path=parent) # The caller provided a valid path consisting of a parent directory # and a name. Set kwargs for the query. kwargs['name'] = name kwargs['parent'] = parent
def main(): vectorizer = CountVectorizer(ngram_range=(1,2),max_df=1.0, min_df=0.0) nei = NearestNeighbors(algorithm='brute', metric='jaccard') matrix = vectorizer.fit_transform(training_set).todense() new_matrix = vectorizer.transform(new_comments).todense() nei.fit(matrix) path = '{0}/'.format(pathsplit(abspath(__file__))[0]) jsonfile = open(path + '{0}-nn.json'.format(n_neighbors), 'w') nodes = [{'name': (training_set+new_comments)[i], 'group':(groups + new_groups)[i]} for i in range(len(training_set+new_comments))] links = [] for i in range(len(matrix)): dist, idnei = nei.kneighbors(matrix[i], n_neighbors=n_neighbors + 1) dist, idnei = dist[0], idnei[0] for j in range(len(idnei[1:])): links.append({"source":i,"target":idnei[j+1],"value":10*(1 - dist[j+1])}) for i in range(len(new_comments)): dist, idnei = nei.kneighbors(new_matrix[i], n_neighbors=n_neighbors + 1) dist, idnei = dist[0], idnei[0] for j in range(len(idnei[1:])): links.append({"source":len(matrix) + i,"target":idnei[j],"value":10*(1 - dist[j+1])}) jsondumped = json.dumps({'nodes':nodes, 'links':links}, indent=2) jsonfile.write(jsondumped)
def datafile(filenames=None): if filenames is None: options = { 'defaultextension': '.WVF', 'filetypes': [('Yokogowa Data File', ('*.WDF', '*.WVF')), ('Hierarchical Data Format (HDF5)', '.hdf5')], 'initialdir': str(Path.home()), 'title': 'Pick WVF or HDF5 files to load' } filenames = tkWindow(askopenfilenames, options) if isinstance(filenames, str): filenames = [filenames] if any([(splitext(filename)[1].lower() == 'wvf') | (splitext(filename)[1].lower() == 'wdf') for filename in filenames]): [ wdf2wvf(filename) for filename in filenames if splitext(filename)[1].lower() == 'wdf' ] return IndexableDict({ pathsplit(filename)[1].split('.')[0]: DataFile(filename) for filename in filenames }) if ('hdf' in splitext(filenames[0]).lower) | ('h5' in splitext( filenames[0]).lower): return read_hdf5(filenames)
def files_different(src, dest): from os.path import isfile, split as pathsplit, getsize, isdir srcparent, destparent = pathsplit(src)[0], pathsplit(dest)[0] # TODO: mtime? if not isdir(srcparent) or not isdir(destparent): return True if not isfile(src) or not isfile(dest): return True if getsize(src) != getsize(dest): return True if md5_file(src) != md5_file(dest): return True return False
def current(self): try: l = readlink(self.link) prefix = self.name + '.' return pathsplit(l)[1].split(prefix, 2)[1] except: pass return None
def test_divide_combi(): import pydream2015 indir = join(pathsplit(pydream2015.__file__)[0], 'test_input') outdir = join(pathsplit(pydream2015.__file__)[0], 'test_output') indir = os.path.abspath(indir) outdir = os.path.abspath(outdir) pydream2015.initdatapath(indir, outdir) therapy_traindata = pd.read_csv(pydream2015.DATA_COMBITHERAPY) trainids, testids = pydream2015.util.divide_combi(therapy_traindata, ratio=0.3) print(len(trainids)) print(len(testids)) pass
def parts(path): """ Split a path by the path separator (/) run doctests with the command: python -m doctest -v randdiff.py >>> parts('/') ('/',) >>> parts('2011') ('2011',) >>> parts('2011/') ('2011',) >>> parts('/2011') ('/', '2011') >>> parts('a/b') ('a', 'b') >>> parts('a/b/') ('a', 'b') >>> parts('/a/b') ('/', 'a', 'b') >>> parts('/a/b/') ('/', 'a', 'b') >>> pathjoin(*(parts('/'))) '/' >>> pathjoin(*(parts('2011'))) '2011' >>> pathjoin(*(parts('2011/'))) '2011' >>> pathjoin(*(parts('/2011'))) '/2011' >>> pathjoin(*(parts('a/b'))) 'a/b' >>> pathjoin(*(parts('a/b/'))) 'a/b' >>> pathjoin(*(parts('/a/b'))) '/a/b' >>> pathjoin(*(parts('/a/b/'))) '/a/b' >>> """ (direc, base) = pathsplit(path) if direc == '': return (base,) if direc == '/': direc = (direc,) else: direc = parts(direc) if base == '': return direc else: return direc + (base,)
def pathcomponents(path): ''' Given a path of '/'-separated components, return the components. This excludes empty components excepting one at the end if the path ends with '/'. ''' prefix, component = pathsplit(path) if prefix not in ['', '/']: # Fixed point return pathcomponents(prefix) + [component] else: return [component]
def _getcaller(self): ''' Grab the name, filename, and line number of the function that created this Timer. ''' f = sys._getframe(2) caller_name = f.f_code.co_name filename = pathsplit(f.f_code.co_filename)[-1] linenumber = f.f_code.co_firstlineno self.called_from = '%s:%s:%s' % (filename, caller_name, linenumber)
def smooth_mutation_NCI(nci, sig, alp, mat, vec, smoothed, overwrite=False, test=False): if exists(smoothed) and (overwrite==False): return result_dir = pathsplit(smoothed)[0] smooth_mutation_NCI_pre(nci, sig, mat, vec, test=test) print '[smoothing]' print '>>', smoothed run_smoother(mat, vec, smoothed, alpha=alp, ncores=4, test=test)
def _build_attachment(self, full_path): file_name = pathsplit(full_path)[1] with open(full_path, "rb") as attachment: part = MIMEBase('application', 'octet-stream') part.set_payload((attachment).read()) encoders.encode_base64(part) part.add_header('Content-Disposition', 'attachment; filename= {}'.format(file_name)) return part
def _move_file(self, file, dst): if self.isdir(dst): raise DirectoryConflictError(dst) dst, file.name = pathsplit(dst.lstrip('/')) if dst: try: file.parent = \ UserDir.objects.get(path=dst, user=self.user) except UserDir.DoesNotExist: raise DirectoryNotFoundError(dst) file.save(update_fields=['parent', 'name']) return file
def save_file_cache(obj, data, user=False): if not getattr(obj, '_disk_cacheable', True): return cache_path = get_obj_cache_path(obj, user) # Ensure that the location for the cache file exists. cache_head = pathsplit(cache_path)[0] if not pathexists(cache_head): os.makedirs(cache_head) # Pickle, compress, and write out. with file(cache_path, 'wb') as f: f.write(data)
def get_metadata(self, path): sidecarJSON = path.replace(".nii.gz", ".json").replace(".nii", ".json") path_components = pathsplit(sidecarJSON) filename_components = path_components[-1].split("_") ses = None suffix = filename_components[-1] sub = filename_components[0] keyword_components = filename_components[1:-1] if filename_components[1][:3] == "ses": ses = filename_components[1] keyword_components = filename_components[2:-1] potentialJSONs = [] for prefixes, midlayer, conditional in ( # Levels (tuple(), tuple(), True), # top ((sub, ), tuple(), True), # subject ((sub, ), (pathsplit(path_components[-2])[-1], ), True), ((sub, ses), tuple(), ses), # session ((sub, ses), (pathsplit(path_components[-2])[-1], ), ses)): if not conditional: continue for k in range(len(keyword_components) + 1): for components in combinations(keyword_components, k): potentialJSONs.append( pathjoin( self.root, *(prefixes + midlayer + ("_".join(prefixes + components + (suffix, )), )))) merged_param_dict = {} for json_file_path in potentialJSONs: if os.path.exists(json_file_path): param_dict = json.load(open(json_file_path, "r")) merged_param_dict.update(param_dict) return merged_param_dict
def run(cmd, verbose=False, dry_run=False): """ Run system command. """ LANGUAGE = "en" # Show "live" command output in verbose mode command_name = pathsplit(abspath(cmd.strip().split(' ')[0]))[1] command = "env LANGUAGE=%s %s" % (LANGUAGE, cmd) if verbose: print " INFO - Run `%s`..." % command # Debug mode or not, we print a nice formatted output of the command if not dry_run: result = getstatusoutput(command) nice_log(log=result[1], cmd_name=command_name)
def __init__(self, json_path): with open(json_path, 'r', encoding='utf-8') as f: unicode_safe = f.read() self.json = json.loads(unicode_safe) self.info = self.json['info'] self.images = self.json['images'] try: self.options = self.json['options'] except KeyError: self.options = [] self.all_files = [ ] # Dicts are not a hashable type, so they need a list self.all_filenames = set() self.new_files = [] for i in self.images: try: floppy_files = i['floppy']['files'] for ff in floppy_files: try: _ = ff['new_file'] self.new_files.append(hf) except KeyError: self.all_files.append(ff) self.all_filenames.add(ff['name']) except KeyError: hdd_files = i['hdd']['files'] for hf in hdd_files: try: _ = hf['new_file'] self.new_files.append(hf) except KeyError: self.all_files.append(hf) self.all_filenames.add(hf['name']) self.all_filenames = list(self.all_filenames) self.patch_dir = pathjoin(pathsplit(json_path)[0], 'patch') # TODO: What other stuff do I need easier access to? if not self._validate_config(): # TODO: Need a more specific message of what is wrong with it. message_wait_close( "A config option in %s is not supported by this verison of Pachy98. Download a newer version." % selected_config) try: self._validate_patch_existence() except FileNotFoundError as e: message_wait_close( "This config references a patch %s that doesn't exist." % e)
def _uri_to_name(uri): """ Converts a uri to a name or key by only taking everything after the last / or (if present) #. Examples: - http://example.com/test -> test - http://example.com/model#testedBy -> testedBy """ usplit = urlsplit(uri) if usplit.fragment != '': return usplit.fragment else: return pathsplit(usplit.path)[-1]
def splitall(path): allparts = [] while 1: parts = pathsplit(path) if parts[0] == path: # sentinel for absolute paths allparts.insert(0, parts[0]) break elif parts[1] == path: # sentinel for relative paths allparts.insert(0, parts[1]) break else: path = parts[0] allparts.insert(0, parts[1]) return allparts
def url_sibling(url, sibling): """ Replaces the last path element in an url Parameters ---------- url : str The url for which the last path element should be replaced sibling : str The replace value """ parsed = urlparse(url) newpath_segments = pathsplit(parsed.path)[:-1] + (sibling,) newpath = pathjoin(*newpath_segments) return urlunparse(list(parsed[:2]) + [newpath] + list(parsed[3:]))
def __init__(self, filename=None, verbose=False): if filename == None: return else: self.filename = filename file_info, trace_info_array = self.read_hdr() [setattr(self, k, v) for k, v in file_info.items()] self.traces = IndexableDict({}) for info in trace_info_array: self.traces[info['name']] = Trace( {key: value for key, value in zip(info.dtype.names, info)}, self) self.date = file_info['date'] self.name = pathsplit(filename)[1].split('.')[0]
def batchunit(inpfile, outfile, with_small=False, overwrite=False): jsonstr = open(inpfile).readlines() jsonstr = "\n".join(jsonstr) hash_object = hashlib.md5(jsonstr.encode()) _cwd = os.getcwd() inpfile = join(_cwd, inpfile) outfile = join(_cwd, outfile) workdir = join('datafiles', hash_object.hexdigest()) if not exists(workdir): os.makedirs(workdir) os.chdir(workdir) mixedmodelmain(inpfile, outfile, with_small, overwrite) tgzdata = pathsplit(outfile)[1].split('.')[0] + '_data.tar.gz' os.system('tar -cvzf ../../%s *' % tgzdata)
def mailinglist_analyse(resdir, mldir, codeface_conf, project_conf, loglevel, logfile, jobs, mailinglists, use_corpus): conf = Configuration.load(codeface_conf, project_conf) ml_resdir = pathjoin(resdir, conf["project"], "ml") exe = abspath(resource_filename(__name__, "R/ml/batch.r")) cwd, _ = pathsplit(exe) cmd = [] cmd.extend(("--loglevel", loglevel)) cmd.extend(("-c", codeface_conf)) cmd.extend(("-p", project_conf)) cmd.extend(("-j", str(jobs))) if (use_corpus): cmd.append("--use-corpus") cmd.append(ml_resdir) cmd.append(mldir) if not mailinglists: mailinglist_conf = conf["mailinglists"] else: mailinglist_conf = [] for mln in mailinglists: match = [ml for ml in conf["mailinglists"] if ml["name"] == mln] if not match: log.fatal("Mailinglist '{}' not listed in configuration file!". format(ml)) raise Exception("Unknown mailing list") if len(match) > 1: log.fatal( "Mailinglist '{}' specified twice in configuration file!". format(ml)) raise Exception("Invalid config file") mailinglist_conf.append(match[0]) for i, ml in enumerate(mailinglist_conf): log.info( "=> Analysing mailing list '{name}' of type '{type}'".format(**ml)) logargs = [] if logfile: logargs = ["--logfile", "{}.R.ml.{}".format(logfile, i)] execute_command([exe] + logargs + cmd + [ml["name"]], direct_io=True, cwd=cwd) log.info("=> Codeface mailing list analysis complete!")
def sociotechnical_analyse(resdir, codeface_conf, project_conf, loglevel, logfile, n_jobs): conf = Configuration.load(codeface_conf, project_conf) project_resdir = pathjoin(resdir, conf["project"]) exe = abspath(resource_filename(__name__, "R/sociotechnical.r")) cwd, _ = pathsplit(exe) cmd = [exe] if logfile: cmd.extend(("--logfile", "{}.R.sociotechnical".format(logfile))) cmd.extend(("--loglevel", loglevel)) cmd.extend(("-c", codeface_conf)) cmd.extend(("-p", project_conf)) cmd.extend(("-j", str(n_jobs))) cmd.append(project_resdir) log.info("=> Performing socio-technical analysis") execute_command(cmd, direct_io=True, cwd=cwd) generate_report_st(pathjoin(resdir, conf["project"], "st")) log.info("=> Codeface socio-technical analysis complete!")
def mailinglist_analyse(resdir, mldir, codeface_conf, project_conf, loglevel, logfile, jobs, mailinglists, use_corpus): conf = Configuration.load(codeface_conf, project_conf) ml_resdir = pathjoin(resdir, conf["project"], "ml") exe = abspath(resource_filename(__name__, "R/ml/batch.r")) cwd, _ = pathsplit(exe) cmd = [] cmd.extend(("--loglevel", loglevel)) cmd.extend(("-c", codeface_conf)) cmd.extend(("-p", project_conf)) cmd.extend(("-j", str(jobs))) if (use_corpus): cmd.append("--use-corpus") cmd.append(ml_resdir) cmd.append(mldir) if not mailinglists: mailinglist_conf = conf["mailinglists"] else: mailinglist_conf = [] for mln in mailinglists: match = [ml for ml in conf["mailinglists"] if ml["name"] == mln] if not match: log.fatal("Mailinglist '{}' not listed in configuration file!". format(ml)) raise Exception("Unknown mailing list") if len(match) > 1: log.fatal("Mailinglist '{}' specified twice in configuration file!". format(ml)) raise Exception("Invalid config file") mailinglist_conf.append(match[0]) for i, ml in enumerate(mailinglist_conf): log.info("=> Analysing mailing list '{name}' of type '{type}'". format(**ml)) logargs = [] if logfile: logargs = ["--logfile", "{}.R.ml.{}".format(logfile, i)] execute_command([exe] + logargs + cmd + [ml["name"]], direct_io=True, cwd=cwd) log.info("=> Codeface mailing list analysis complete!")
def main(): vectorizer = CountVectorizer(ngram_range=(1, 2), max_df=1.0, min_df=0.0) nei = NearestNeighbors(algorithm='brute', metric='jaccard') matrix = vectorizer.fit_transform(training_set).todense() new_matrix = vectorizer.transform(new_comments).todense() nei.fit(matrix) path = '{0}/'.format(pathsplit(abspath(__file__))[0]) jsonfile = open(path + '{0}-nn.json'.format(n_neighbors), 'w') nodes = [{ 'name': (training_set + new_comments)[i], 'group': (groups + new_groups)[i] } for i in range(len(training_set + new_comments))] links = [] for i in range(len(matrix)): dist, idnei = nei.kneighbors(matrix[i], n_neighbors=n_neighbors + 1) dist, idnei = dist[0], idnei[0] for j in range(len(idnei[1:])): links.append({ "source": i, "target": idnei[j + 1], "value": 10 * (1 - dist[j + 1]) }) for i in range(len(new_comments)): dist, idnei = nei.kneighbors(new_matrix[i], n_neighbors=n_neighbors + 1) dist, idnei = dist[0], idnei[0] for j in range(len(idnei[1:])): links.append({ "source": len(matrix) + i, "target": idnei[j], "value": 10 * (1 - dist[j + 1]) }) jsondumped = json.dumps({'nodes': nodes, 'links': links}, indent=2) jsonfile.write(jsondumped)
def main(): nei = NearestNeighbors(metric='euclidean') nei.fit(matrix) path = '{0}/'.format(pathsplit(abspath(__file__))[0]) jsonfile = open(path + '{1}_rand-{0}-nn.json'.format(n_neighbors, name), 'w') nodes = [{'name': i, 'group':groups[i]} for i in range(len(matrix))] links = [] for i in range(len(matrix)): dist, idnei = nei.kneighbors(matrix[i], n_neighbors=n_neighbors + 1) dist, idnei = dist[0], idnei[0] for j in range(len(idnei[1:])): links.append({"source":i,"target":idnei[j+1],"value":10*(1 - dist[j+1])}) jsondumped = json.dumps({'nodes':nodes, 'links':links}, indent=2) jsonfile.write(jsondumped)
def split_extra(self, path, extra_info='', nodir=1): """ if given a path will extra path info, will return (path of existing file, extra path info) if some part of the path represents an existing file, and ('', '') if there is no file. If nodir is true, the file must not be a directory unless extra_info=='', or the return value will be ('', ''). """ if path=='': return ('', '') if self.exists(path): if nodir and self.isdir(path) and extra_info!='': return ('', '') return path, extra_info else: p, e=pathsplit(path) if extra_info: ex='%s/%s' % (e, extra_info) else: ex=e return self.split_extra(p, ex, nodir)
def _prep_file_under_git(path, filename): """Get instance of the repository for the given filename Helper to be used by few functions """ if filename is None: # path provides the path and the name path, filename = pathsplit(path) try: # if succeeds when must not (not `annexed`) -- fail repo = get_repo_instance(path, class_=AnnexRepo) annex = True except RuntimeError as e: # TODO: make a dedicated Exception if "No annex repository found in" in str(e): repo = get_repo_instance(path, class_=GitRepo) annex = False else: raise # path to the file within the repository file_repo_dir = os.path.relpath(path, repo.path) file_repo_path = filename if file_repo_dir == curdir else opj(file_repo_dir, filename) return annex, file_repo_path, filename, path, repo
# it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. from os.path import realpath, split as pathsplit from os.path import join as pathjoin DATA_FILE = realpath(pathjoin(pathsplit(__file__)[0], 'dorms.txt')) DATA_ENC = 'utf-8-sig' SEGTYPE_FIELD = u'segtype' # now the parsing of individual parts uses dynamic dispatch SEGTYPE_UNIFORM = u'uniform' SEGTYPE_SPECIAL = u'special' # lambdas are used to prevent scoping error SEGTYPE_DISPATCH = {SEGTYPE_UNIFORM: lambda a, b, c: parse_uniform(a, b, c), SEGTYPE_SPECIAL: lambda a, b, c: parse_special(a, b, c), } IPPREFIX_FIELD = u'ipprefix' IP_BEGIN_FIELD = u'ipsubstart' IP_STEP_FIELD = u'ipstep' APARTMENT_START_FIELD = u'apartmentstart'
def project_analyse(resdir, gitdir, codeface_conf, project_conf, no_report, loglevel, logfile, recreate, profile_r, n_jobs, tagging_type, reuse_db): pool = BatchJobPool(int(n_jobs)) conf = Configuration.load(codeface_conf, project_conf) tagging = conf["tagging"] if tagging_type is not "default": if not tagging_type in LinkType.get_all_link_types(): log.critical('Unsupported tagging mechanism specified!') raise ConfigurationError('Unsupported tagging mechanism.') # we override the configuration value if tagging is not tagging_type: log.warn( "tagging value is overwritten to {0} because of --tagging" .format(tagging_type)) tagging = tagging_type conf["tagging"] = tagging project = conf["project"] repo = pathjoin(gitdir, conf["repo"], ".git") project_resdir = pathjoin(resdir, project, tagging) range_by_date = False # When revisions are not provided by the configuration file # generate the analysis window automatically if len(conf["revisions"]) < 2: window_size_months = 3 # Window size in months num_window = -1 # Number of ranges to analyse, -1 captures all ranges revs, rcs = generate_analysis_windows(repo, window_size_months) conf["revisions"] = revs[-num_window-1:] conf["rcs"] = rcs[-num_window-1:] range_by_date = True # TODO: Sanity checks (ensure that git repo dir exists) if tagging == LinkType.proximity: check4ctags() elif tagging in (LinkType.feature, LinkType.feature_file): check4cppstats() project_id, dbm, all_range_ids = project_setup(conf, recreate) ## Save configuration file conf.write() project_conf = conf.get_conf_file_loc() # Analyse new revision ranges for i, range_id in enumerate(all_range_ids): start_rev, end_rev, rc_rev = dbm.get_release_range(project_id, range_id) range_resdir = pathjoin(project_resdir, "{0}-{1}". format(start_rev, end_rev)) prefix = " -> Revision range {0}..{1}: ".format(start_rev, end_rev) ####### # STAGE 1: Commit analysis s1 = pool.add( doProjectAnalysis, (conf, start_rev, end_rev, rc_rev, range_resdir, repo, reuse_db, True, range_by_date), startmsg=prefix + "Analysing commits...", endmsg=prefix + "Commit analysis done." ) ######### # STAGE 2: Cluster analysis exe = abspath(resource_filename(__name__, "R/cluster/persons.r")) cwd, _ = pathsplit(exe) cmd = [] cmd.append(exe) cmd.extend(("--loglevel", loglevel)) if logfile: cmd.extend(("--logfile", "{}.R.r{}".format(logfile, i))) cmd.extend(("-c", codeface_conf)) cmd.extend(("-p", project_conf)) cmd.append(range_resdir) cmd.append(str(range_id)) s2 = pool.add( execute_command, (cmd,), {"direct_io":True, "cwd":cwd}, deps=[s1], startmsg=prefix + "Detecting clusters...", endmsg=prefix + "Detecting clusters done." ) ######### # STAGE 3: Generate cluster graphs if not no_report: pool.add( generate_reports, (start_rev, end_rev, range_resdir), deps=[s2], startmsg=prefix + "Generating reports...", endmsg=prefix + "Report generation done." ) # Wait until all batch jobs are finished pool.join() ######### # Global stage 1: Time series generation log.info("=> Preparing time series data") dispatch_ts_analysis(project_resdir, conf) ######### # Global stage 2: Complexity analysis ## NOTE: We rely on proper timestamps, so we can only run ## after time series generation log.info("=> Performing complexity analysis") for i, range_id in enumerate(all_range_ids): log.info(" -> Analysing range {}".format(range_id)) exe = abspath(resource_filename(__name__, "R/complexity.r")) cwd, _ = pathsplit(exe) cmd = [exe] if logfile: cmd.extend(("--logfile", "{}.R.complexity.{}".format(logfile, i))) cmd.extend(("--loglevel", loglevel)) cmd.extend(("-c", codeface_conf)) cmd.extend(("-p", project_conf)) cmd.extend(("-j", str(n_jobs))) cmd.append(repo) cmd.append(str(range_id)) execute_command(cmd, direct_io=True, cwd=cwd) ######### # Global stage 3: Time series analysis log.info("=> Analysing time series") exe = abspath(resource_filename(__name__, "R/analyse_ts.r")) cwd, _ = pathsplit(exe) cmd = [exe] if profile_r: cmd.append("--profile") if logfile: cmd.extend(("--logfile", "{}.R.ts".format(logfile))) cmd.extend(("--loglevel", loglevel)) cmd.extend(("-c", codeface_conf)) cmd.extend(("-p", project_conf)) cmd.extend(("-j", str(n_jobs))) cmd.append(project_resdir) execute_command(cmd, direct_io=True, cwd=cwd) log.info("=> Codeface run complete!")
def project_analyse(resdir, gitdir, codeface_conf, project_conf, no_report, loglevel, logfile, recreate, profile_r, n_jobs): pool = BatchJobPool(int(n_jobs)) conf = Configuration.load(codeface_conf, project_conf) project, tagging = conf["project"], conf["tagging"] repo = pathjoin(gitdir, conf["repo"], ".git") project_resdir = pathjoin(resdir, project, tagging) # When revisions are not provided by the configuration file # generate the analysis window automatically if len(conf["revisions"]) < 2: window_size_months = 3 # Window size in months conf["revisions"], conf["rcs"] = generate_analysis_windows(repo, window_size_months) # TODO: Sanity checks (ensure that git repo dir exists) if 'proximity' == conf["tagging"]: check4ctags() project_id, dbm, all_range_ids = project_setup(conf, recreate) ## Save configuration file conf.write() project_conf = conf.get_conf_file_loc() # Analyse new revision ranges for i, range_id in enumerate(all_range_ids): start_rev, end_rev, rc_rev = dbm.get_release_range(project_id, range_id) range_resdir = pathjoin(project_resdir, "{0}-{1}". format(start_rev, end_rev)) prefix = " -> Revision range {0}..{1}: ".format(start_rev, end_rev) ####### # STAGE 1: Commit analysis s1 = pool.add( doProjectAnalysis, (conf, start_rev, end_rev, rc_rev, range_resdir, repo, True, True), startmsg=prefix + "Analysing commits...", endmsg=prefix + "Commit analysis done." ) ######### # STAGE 2: Cluster analysis exe = abspath(resource_filename(__name__, "R/cluster/persons.r")) cwd, _ = pathsplit(exe) cmd = [] cmd.append(exe) cmd.extend(("--loglevel", loglevel)) if logfile: cmd.extend(("--logfile", "{}.R.r{}".format(logfile, i))) cmd.extend(("-c", codeface_conf)) cmd.extend(("-p", project_conf)) cmd.append(range_resdir) cmd.append(str(range_id)) s2 = pool.add( execute_command, (cmd,), {"direct_io":True, "cwd":cwd}, deps=[s1], startmsg=prefix + "Detecting clusters...", endmsg=prefix + "Detecting clusters done." ) ######### # STAGE 3: Generate cluster graphs if not no_report: pool.add( generate_reports, (start_rev, end_rev, range_resdir), deps=[s2], startmsg=prefix + "Generating reports...", endmsg=prefix + "Report generation done." ) # Wait until all batch jobs are finished pool.join() ######### # Global stage 1: Time series generation log.info("=> Preparing time series data") dispatch_ts_analysis(project_resdir, conf) ######### # Global stage 2: Complexity analysis ## NOTE: We rely on proper timestamps, so we can only run ## after time series generation log.info("=> Performing complexity analysis") for i, range_id in enumerate(all_range_ids): log.info(" -> Analysing range {}".format(range_id)) exe = abspath(resource_filename(__name__, "R/complexity.r")) cwd, _ = pathsplit(exe) cmd = [exe] if logfile: cmd.extend(("--logfile", "{}.R.complexity.{}".format(logfile, i))) cmd.extend(("--loglevel", loglevel)) cmd.extend(("-c", codeface_conf)) cmd.extend(("-p", project_conf)) cmd.extend(("-j", str(n_jobs))) cmd.append(repo) cmd.append(str(range_id)) execute_command(cmd, direct_io=True, cwd=cwd) ######### # Global stage 3: Time series analysis log.info("=> Analysing time series") exe = abspath(resource_filename(__name__, "R/analyse_ts.r")) cwd, _ = pathsplit(exe) cmd = [exe] if profile_r: cmd.append("--profile") if logfile: cmd.extend(("--logfile", "{}.R.ts".format(logfile))) cmd.extend(("--loglevel", loglevel)) cmd.extend(("-c", codeface_conf)) cmd.extend(("-p", project_conf)) cmd.extend(("-j", str(n_jobs))) cmd.append(project_resdir) execute_command(cmd, direct_io=True, cwd=cwd) log.info("=> Prosoda run complete!")
# GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. from __future__ import unicode_literals, division from os.path import realpath, split as pathsplit from os.path import join as pathjoin from cPickle import loads from kbspasswd import kbs_encode DATA_FILE = realpath(pathjoin(pathsplit(__file__)[0], 'PASSWDS.pickle')) def build_dict(): with open(DATA_FILE, 'rb') as fp: content = fp.read() return loads(content) _dict = build_dict() def chkpasswd(uid, psw): return _dict[uid] == kbs_encode(uid, psw)
def conway_analyse(resdir, gitdir, titandir, codeface_conf, project_conf, loglevel, logfile, jobs): conf = Configuration.load(codeface_conf, project_conf) log.info("Performing conway analysis") pool = BatchJobPool(int(jobs)) conf = Configuration.load(codeface_conf, project_conf) project = conf["project"] repo = pathjoin(gitdir, conf["repo"], ".git") project_resdir = pathjoin(resdir, project, "conway") range_by_date = False if conf["tagging"] != "proximity": log.error("Conway analysis requires analysis in proximity mode!") return # Set defaults for the various analysis choices if they are not explicitly # given in the configuration file if "artifactType" not in conf.keys(): conf["artifactType"] = "file" log.info("Conway analysis: No artefact type given, defaulting to 'file'") if "dependencyType" not in conf.keys(): conf["dependencyType"] = "none" log.info("Conway analysis: No dependency type given, defaulting to 'none'") if "qualityType" not in conf.keys(): conf["qualityType"] = "corrective" log.info("Conway analysis: No quality type given, defaulting to 'corrective'") if "communicationType" not in conf.keys(): conf["communicationType"] = "mail" log.info("Conway analysis: No communication type given, defaulting to 'mail'") if conf["communicationType"] == "jira" and \ (("issueTrackerType" in conf.keys() and conf["issueTrackerType"] != "jira") or \ not("issueTrackerType" in conf.keys())): log.info("Conway analysis configuration requires jira bugtracking information, exiting") return # When revisions are not provided by the configuration file # generate the analysis window automatically if len(conf["revisions"]) < 2: window_size_months, num_window = get_analysis_windows(conf) revs, rcs, dates = generate_analysis_windows(repo, window_size_months) conf["revisions"] = revs[-num_window-1:] conf["rcs"] = rcs[-num_window-1:] range_by_date = True project_id, dbm, all_range_ids = project_setup(conf, False) ## Save modified configuration file to a temporary location conf.write() project_conf = conf.get_conf_file_loc() # Global stage: Download and process JIRA issues if conf["communicationType"] == "jira": log.info("=> Downloading and processing JIRA issues") dispatch_jira_processing(project_resdir, titandir, conf) # Revision range specific analysis for i, range_id in enumerate(all_range_ids): start_rev, end_rev, rc_rev = dbm.get_release_range(project_id, range_id) start_date = dbm.get_commit_cdate(project_id, start_rev) end_date = dbm.get_commit_cdate(project_id, end_rev) range_resdir = gen_range_path(project_resdir, i+1, start_rev, end_rev) prefix = gen_prefix(i+1, len(all_range_ids), start_rev, end_rev) ####### # STAGE 1: s1 = pool.add( parseCommitLoC, (conf, dbm, project_id, range_id, start_rev, end_rev, range_resdir, repo), startmsg=prefix + "Computing file/developer relations...", endmsg=prefix + "Computing file/developer relations done." ) ######### # STAGE 2: Connect commits and jira issues if "communicationType" in conf.keys() and conf["communicationType"] == "jira": exe = abspath(resource_filename(__name__, "R/conway_metrics.r")) cwd, _ = pathsplit(exe) cmd = [] cmd.append(exe) cmd.extend(("--loglevel", loglevel)) if logfile: cmd.extend(("--logfile", "{}.R.r{}".format(logfile, i))) cmd.extend(("-c", codeface_conf)) cmd.extend(("-p", project_conf)) cmd.append(project_resdir) cmd.append(range_resdir) s2 = pool.add( execute_command, (cmd,), {"direct_io":True, "cwd":cwd}, deps=[s1], startmsg=prefix + "Connecting commits and issues...", endmsg=prefix + "Connecting commits and issues done." ) ####### # STAGE 3: Obtain SDSM using Titan if "dependencyType" in conf.keys() and conf["dependencyType"] == "dsm": exe = abspath(resource_filename(__name__, "R/titan.r")) cwd, _ = pathsplit(exe) cmd = [] cmd.append(exe) cmd.extend(("--loglevel", loglevel)) if logfile: cmd.extend(("--logfile", "{}.R.r{}".format(logfile, i))) cmd.extend(("-c", codeface_conf)) cmd.extend(("-p", project_conf)) cmd.append(repo) cmd.append(range_resdir) cmd.append(titandir) cmd.append(end_rev) s3 = pool.add( execute_command, (cmd,), {"direct_io":True, "cwd":cwd}, deps=[], startmsg=prefix + "Inferring architectural metrics with Titan...", endmsg=prefix + "Titan run done." ) ######### # STAGE 4: Perform socio-technical analysis exe = abspath(resource_filename(__name__, "R/socio_technical_analysis.r")) cwd, _ = pathsplit(exe) cmd = [] cmd.append(exe) cmd.extend(("--loglevel", loglevel)) if logfile: cmd.extend(("--logfile", "{}.R.r{}".format(logfile, i))) cmd.extend(("-c", codeface_conf)) cmd.extend(("-p", project_conf)) cmd.append(project_resdir) cmd.append(range_resdir) cmd.append(start_date) cmd.append(end_date) if "dependencyType" in conf.keys() and conf["dependencyType"] == "dsm": deps=[s3] else: deps=[] s4 = pool.add( execute_command, (cmd,), {"direct_io":True, "cwd":cwd}, deps=deps, startmsg=prefix + "Performing socio-technical analysis...", endmsg=prefix + "Socio-technical analysis done." ) # Wait until all batch jobs are finished pool.join() log.info("=> Codeface conway analysis complete!")
def __enter__(self): ''' This function is called when entering a with statement. Here a real git repository is created and populated with the information in the project. ''' self.directory = mkdtemp(prefix="codeface_test_project") cwd = getcwd() try: chdir(self.directory) def git(cmds, committer=None, commitdate=None): if committer or commitdate: env = dict(environ) if committer: env["GIT_COMMITTER_NAME"] = committer.name env["GIT_COMMITTER_EMAIL"] = committer.email if commitdate: env["GIT_COMMITTER_DATE"] = commitdate check_call(["git"] + cmds, env=env) else: check_call(["git"] + cmds) git(["init"]) next_release = 0 next_rc = 0 release_tags = [] rc_tags = {} for i, c in enumerate(self._commits): # First, clear the directory for f in listdir("."): if f != ".git": if isdir(f): rmtree(f) else: unlink(f) # Insert the files specified in the commits filetree for f, content in c.filetree.iteritems(): dn, fn = pathsplit(f) if dn and not exists(dn): makedirs(dn) with file(f, "w") as fd: fd.write(content) # Perform the commit git("add -A .".split()) commitmsg = "Commit {}\n\nCommit message\n\n".format(i) for signer in c.signoff: commitmsg += "Signed-off-by: {}\n".format(str(signer)) git(["commit", "--author", str(c.author), "--date", c.datetime, "-m", commitmsg], committer=c.committer, commitdate=c.datetime) # Tag the commit for tag in c.tags: name = "v{}_{}".format(next_release, tag.type) if tag.type == "rc": name += "_{}".format(next_rc) rc_tags.setdefault(next_release, name) # do not overwrite first rc next_rc += 1 elif tag.type == "release": release_tags.append(name) next_release += 1 git(["tag", name]) # Create codeface test configuration configuration = dedent(""" --- project: {project} repo: {project} # Relative to git-dir as specified on the command line description: {project} Description mailinglists: - name: {project}.dev1 type: dev source: generated - name: {project}.dev2 type: dev source: generated - name: {project}.user1 type: user source: generated - name: {project}.user2 type: user source: generated revisions: {release_tags} rcs : {rctags} tagging: {tagging} """.format(release_tags=str(release_tags), tagging=self._tagging, rctags=str([rc_tags.get(i, release_tags[i]) for i in range(len(release_tags))]), project=basename(self.directory) ) ) with file(self.codeface_conf, "w") as fd: fd.write(configuration) for ml_name, ml_file in self.mboxes: with file(ml_file, "w") as fd: fd.write(self.mbox_contents(ml_name)) finally: chdir(cwd)
DEBUG_POSTFIX = '_d' if DEBUG else '' USE_DEVENV = not VS_EXPRESS # devenv.exe does not come with the free compiler makepath = lambda *p: normpath(pathjoin(abspath(p[0]), *p[1:])) if USE_PYTHON_26: # # Python 2.6 Trunk # PYTHON_SVN_REVISION = '72606' PYTHON_SVN_URL = '%s/python/branches/release26-maint@%s' % (PYTHON_PROJECTS_SVN, PYTHON_SVN_REVISION) PCBUILD_DIR = 'PCBuild' PYTHON_LIBDIR = normpath(pathjoin(abspath(pathsplit(__file__)[0]), PYTHON_DIR, PCBUILD_DIR)) PYTHON_PGO_DIR = 'Win32-pgo' PYTHON_PGI_LIBDIR = normpath(pathjoin(abspath(pathsplit(__file__)[0]), PYTHON_DIR, PCBUILD_DIR, 'Win32-pgi')) PYTHON_PGO_LIBDIR = normpath(pathjoin(abspath(pathsplit(__file__)[0]), PYTHON_DIR, PCBUILD_DIR, PYTHON_PGO_DIR)) PYTHON_EXE = normpath(pathjoin(PYTHON_LIBDIR, r'python%s.exe' % DEBUG_POSTFIX)) PYTHON_EXE_PGI = normpath(pathjoin(PYTHON_PGI_LIBDIR, r'python.exe')) PYTHON_EXE_PGO = normpath(pathjoin(PYTHON_PGO_LIBDIR, r'python.exe')) PYTHON_VER = '26' PYTHON_BZIP = ('bzip2-1.0.5', '%s/external/bzip2-1.0.5' % PYTHON_PROJECTS_SVN) PYTHON_SQLITE = ('sqlite-3.5.9', '%s/external/sqlite-3.5.9/' % PYTHON_PROJECTS_SVN) else: # # Python 2.5 Maintenance Branch # PYTHON_SVN_REVISION = 'HEAD' PYTHON_SVN_URL = '%s/python/branches/release25-maint@%s' % (PYTHON_PROJECTS_SVN, PYTHON_SVN_REVISION)
#!/usr/bin/env python from os import system, listdir from os.path import join as pathjoin, split as pathsplit, dirname, abspath from sys import path as pythonpath, argv, exit #Establish some useful global variables. # #We assume that this program is located in the root directory of #the instance of the pub package that we are testing. Given #that assumption, we find everything else by building paths. #relative to that root. # pubdir = abspath(dirname(argv[0])) PYTHONPATH = pathsplit(pubdir)[0] pythonpath.insert(0, PYTHONPATH) testdir = pathjoin(pubdir, 'test') gamesdir = pathjoin(pubdir, 'games') #This routine will run a particular test game and diff the output. def runtest(name, gamepath, makeoutput): inputfile = pathjoin(testdir, '%s-input' % name) outputfile = pathjoin(testdir, '%s-output' % name) if makeoutput: testfile = outputfile else: testfile = pathjoin(testdir, '%s-testout' % name) system('PYTHONPATH=%s PUBTESTING=true python pubrun %s <%s >%s' % (PYTHONPATH, gamepath, inputfile, testfile)) system('rm pub.dat') if makeoutput: return system("sed -e 's/0x.*c//' %s > diff1" % testfile) system("sed -e 's/0x.*c//' %s > diff2" % outputfile)