Ejemplo n.º 1
0
 def get(self, key, **kwargs):
     info = {}
     recursive = kwargs.get("recursive", False)
     rsp = self.client.read(key, **kwargs)
     for child in rsp.get_subtree():
         if pathsplit(child.key)[0] != key:  # skip children's attrs
             continue
         if recursive and child.dir:  # sub children, make an dict
             for c in child.children:
                 root, attr = pathsplit(c.key)
                 basename, id = split_letters(pathsplit(root)[1])
                 if root == child.key and basename == attr:
                     # only match the main info
                     attrs = "%sS" % attr
                     info.setdefault(attrs, {})[id] = json.loads(c.value)
             continue
         else:
             try:
                 value = json.loads(child.value)
             except TypeError:
                 if recursive:
                     raise TypeError("%s is a directory" % child.key)
                 else:
                     continue
             _, attr = pathsplit(child.key)
             info[attr] = value
     return info
Ejemplo n.º 2
0
def test_divide_train_leaderboard():
    sys.path.append('..')
    import pydream2015
    indir = join(pathsplit(pydream2015.__file__)[0], 'test_input')
    outdir = join(pathsplit(pydream2015.__file__)[0], 'test_output')
    indir = os.path.abspath(indir)
    outdir = os.path.abspath(outdir)
    pydream2015.initdatapath(indir, outdir)

    trainids, testids = pydream2015.util.divide_train_leaderboard()
        def create_generator(folder, batch_size=2):
            x_dir = join(folder, "x")
            y_dir = join(folder, "y")

            assert exists(x_dir) is True
            assert exists(y_dir) is True

            # FIX: glob.glob is waaaaay faster than [f for f in listdir() if isfile(f)]
            x_files = glob.glob(join(x_dir, "*.tif")) + glob.glob(
                join(x_dir, "*.tiff"))
            y_files = glob.glob(join(y_dir, "*.tif")) + glob.glob(
                join(y_dir, "*.tiff"))

            assert len(x_files) == len(y_files)

            # Number of files
            nbr_files = len(x_files)
            # Let's begin the training/validation with the first file
            index = 0
            while True:
                x, y = list(), list()
                for i in range(batch_size):
                    # Get a new index
                    index = (index + 1) % nbr_files

                    # MUST be true (files must have the same name)
                    assert pathsplit(x_files[index])[-1] == pathsplit(
                        y_files[index])[-1]

                    x_img = img_to_array(load_img(x_files[index]))
                    y_img = img_to_array(load_img(y_files[index]))

                    # Resize each image
                    x_img, y_img = imresize(x_img,
                                            self.input_shape[:2]), imresize(
                                                y_img, self.input_shape[:2])
                    # Apply a transformation on these images
                    # x_img, y_img = transfromXY(x_img, y_img)

                    # Change y shape : (m, n, 3) -> (m, n, 2) (2 is the class number)
                    temp_y_img = np.zeros(self.input_shape[:2] + (1, ))
                    temp_y_img[y_img[:, :, 0] == 0] = 0
                    temp_y_img[y_img[:, :, 0] == 255] = 1
                    y_img = temp_y_img

                    # Convert to float
                    x_img = x_img.astype('float32')
                    y_img = y_img.astype('float32')
                    # Divide by the maximum value of each pixel
                    x_img /= 255
                    # Append images to the lists
                    x.append(x_img)
                    y.append(y_img)
                yield np.array(x), np.array(y)
Ejemplo n.º 4
0
        def createGenerator(dir, batch_size=2):
            x_dir = join(dir, "x")
            y_dir = join(dir, "y")

            assert exists(x_dir) == True
            assert exists(y_dir) == True

            # FIX: glob.glob is waaaaay faster than [f for f in listdir() if isfile(f)]
            x_files = glob.glob(join(x_dir, "*.jpg")) + glob.glob(
                join(x_dir, "*.png"))
            y_files = glob.glob(join(y_dir, "*.jpg")) + glob.glob(
                join(y_dir, "*.png"))

            assert len(x_files) == len(y_files)

            while True:
                x, y = list(), list()
                for _ in range(batch_size):
                    # Get a random index between 0 and len(x_files)
                    index = randint(0, len(x_files) - 1)

                    # MUST be true (files must have the same name)
                    assert pathsplit(x_files[index])[-1] == pathsplit(
                        y_files[index])[-1]

                    x_img = img_to_array(load_img(x_files[index]))
                    y_img = img_to_array(load_img(y_files[index]))

                    # Resize each image
                    x_img, y_img = imresize(x_img,
                                            self.input_shape[:2]), imresize(
                                                y_img, self.input_shape[:2])
                    # Apply a transformation on these images
                    x_img, y_img = transfromXY(x_img, y_img)

                    # Change y shape : (m, n, 3) -> (m, n, 2) (2 is the class number)
                    temp_y_img = np.zeros(self.input_shape[:2] +
                                          (self.__nClasses, ))
                    temp_y_img[y_img[:, :, 1] != 255] = 0
                    temp_y_img[y_img[:, :, 1] == 255] = 1
                    y_img = temp_y_img

                    # Convert to float
                    x_img = x_img.astype('float32')
                    y_img = y_img.astype('float32')
                    # Divide by the maximum value of each pixel
                    x_img /= 255
                    # Append images to the lists
                    x.append(x_img)
                    y.append(y_img)
                yield np.array(x), np.array(y)
Ejemplo n.º 5
0
 def samples(self):
     """
     Yield labeled samples from txt files.
     Labels are the parent directory of the file.
     """
     # this works in this case because we have one sample
     # per file
     n = 0
     n_samples = len(self.datafiles)
     for datafile in self.datafiles:
         label = pathsplit(dirname(datafile))[1]
         split = pathsplit(datafile)[0].split('/')[-2]
         with open(datafile, 'r') as f:
             sample = f.read()
             yield self.sample_class(sample, label, self.dataset, split)
Ejemplo n.º 6
0
    def download_file(self, filename, file_size):
        if not self.connection:
            return None

        valid_filename = pathsplit(filename)[1]
        bytes_left = int(file_size)
        byteat = self.byteat

        if not valid_filename:
            return False

        print "Start receiving the file {} bytes".format(file_size),

        with open(join(UPLOAD_DIR, valid_filename), 'wb') as file:
            while bytes_left > 0:
                # Check how many bytes left.
                if byteat > bytes_left:
                    byteat = bytes_left

                data = self.connection.recv(byteat)
                file.write(data)
                bytes_left -= byteat
                # print ".",

        print "done."
        return True
Ejemplo n.º 7
0
def _prep_file_under_git(path, filename):
    """Get instance of the repository for the given filename

    Helper to be used by few functions
    """
    if filename is None:
        # path provides the path and the name
        path, filename = pathsplit(path)
    try:
        # if succeeds when must not (not `annexed`) -- fail
        repo = get_repo_instance(path, class_=AnnexRepo)
        annex = True
    except RuntimeError as e:
        # TODO: make a dedicated Exception
        if "No annex repository found in" in str(e):
            repo = get_repo_instance(path, class_=GitRepo)
            annex = False
        else:
            raise

    # path to the file within the repository
    # repo.path is a "realpath" so to get relpath working correctly
    # we need to realpath our path as well
    path = op.realpath(path)  # intentional realpath to match GitRepo behavior
    file_repo_dir = op.relpath(path, repo.path)
    file_repo_path = filename if file_repo_dir == curdir else opj(
        file_repo_dir, filename)
    return annex, file_repo_path, filename, path, repo
Ejemplo n.º 8
0
    def make_joblist(self):
        """Make a list of jobs with all variants for all infiles and create outfile directories.

        The in/out mapping is file.* > outdir/variant_name/provider/file.mp4."""

        def get_task_lock_file(out_filename):
            "Get task-lock filename."
            return "%s.X" % splitext(out_filename)[0]

        def get_logfile(out_filename):
            "Get logfile name"
            return "%s.log" % splitext(out_filename)[0]

        for infile in self.infiles:
            if not os.path.exists(infile):
                print "Warning: infile %s does not exist. Skipping it" % infile
                continue
            infile_base = splitext(pathsplit(infile)[1])[0]
            for variant in self.config:
                outdir = normpath(pathjoin(self.outdir, infile_base))
                if not os.path.exists(outdir):
                    os.makedirs(outdir)
                outfile = pathjoin(outdir, variant['name'] + '.mp4')
                taskfile = get_task_lock_file(outfile)
                logfile = get_logfile(outfile)
                if os.path.exists(taskfile) or (not os.path.exists(outfile)):
                    job = {'inFile' : infile, 'outFile' : outfile, 'lockFile' : taskfile,
                           'get_logfile' : logfile}
                    job.update(variant)
                    self.jobs.append(job)
                    if len(self.jobs) == self.max_jobs:
                        break
Ejemplo n.º 9
0
def main():

    nei = NearestNeighbors()
    nei.fit(matrix)
    path = '{0}/'.format(pathsplit(abspath(__file__))[0])

    jsonfile = open(path + '{1}_rand-{0}-nn.json'.format(n_neighbors, name),
                    'w')

    nodes = [{'name': i, 'group': groups[i]} for i in range(len(matrix))]
    links = []

    for i in range(len(matrix)):
        dist, idnei = nei.kneighbors(matrix[i], n_neighbors=n_neighbors + 1)
        dist, idnei = dist[0], idnei[0]

        for j in range(len(idnei[1:])):
            links.append({
                "source": i,
                "target": idnei[j + 1],
                "value": 10 * (1 - dist[j + 1])
            })

    jsondumped = json.dumps({'nodes': nodes, 'links': links}, indent=2)

    jsonfile.write(jsondumped)
Ejemplo n.º 10
0
 def _args(model, kwargs, create_parent=False):
     path = kwargs.pop('path', None)
     if path:
         try:
             user = kwargs['user']
         except KeyError:
             raise ValueError('`user` argument required with `path`')
         parent, name = pathsplit(path.lstrip('/'))
         parent = parent if parent else ''
         if name == '' and parent == '':
             # If both are empty, caller is asking for '/' or similar, which
             # cannot be a file.
             raise UserFile.DoesNotExist()
         elif parent == '':
             # If only parent is empty, caller wants a file within root.
             parent = UserDir.objects.get_root(user)
         else:
             # If neither are empty, caller wants a file with a directory.
             try:
                 parent = UserDir.objects.get(user=user, path=parent)
             except UserDir.DoesNotExist:
                 # Caller may want us to create parent dirs (-p). For
                 # example, during create().
                 if not create_parent:
                     # If not, raise.
                     raise UserFile.DoesNotExist()
                 parent = UserDir.objects.create(user=user, path=parent)
         # The caller provided a valid path consisting of a parent directory
         # and a name. Set kwargs for the query.
         kwargs['name'] = name
         kwargs['parent'] = parent
Ejemplo n.º 11
0
def main():
    vectorizer = CountVectorizer(ngram_range=(1,2),max_df=1.0, min_df=0.0)

    nei = NearestNeighbors(algorithm='brute', metric='jaccard')
    matrix = vectorizer.fit_transform(training_set).todense()
    new_matrix = vectorizer.transform(new_comments).todense()
    nei.fit(matrix)
    path =  '{0}/'.format(pathsplit(abspath(__file__))[0])
    jsonfile = open(path + '{0}-nn.json'.format(n_neighbors), 'w')

    nodes = [{'name': (training_set+new_comments)[i],
              'group':(groups + new_groups)[i]}
             for i in range(len(training_set+new_comments))]
    links = []

    for i in range(len(matrix)):
        dist, idnei = nei.kneighbors(matrix[i], n_neighbors=n_neighbors + 1)
        dist, idnei = dist[0], idnei[0]

        for j in range(len(idnei[1:])):
            links.append({"source":i,"target":idnei[j+1],"value":10*(1 - dist[j+1])})

    for i in range(len(new_comments)):
        dist, idnei = nei.kneighbors(new_matrix[i], n_neighbors=n_neighbors + 1)
        dist, idnei = dist[0], idnei[0]
        for j in range(len(idnei[1:])):
            links.append({"source":len(matrix) + i,"target":idnei[j],"value":10*(1 - dist[j+1])})

    jsondumped = json.dumps({'nodes':nodes, 'links':links}, indent=2)

    jsonfile.write(jsondumped)
Ejemplo n.º 12
0
def datafile(filenames=None):
    if filenames is None:
        options = {
            'defaultextension':
            '.WVF',
            'filetypes': [('Yokogowa Data File', ('*.WDF', '*.WVF')),
                          ('Hierarchical Data Format (HDF5)', '.hdf5')],
            'initialdir':
            str(Path.home()),
            'title':
            'Pick WVF or HDF5 files to load'
        }
        filenames = tkWindow(askopenfilenames, options)

    if isinstance(filenames, str):
        filenames = [filenames]

    if any([(splitext(filename)[1].lower() == 'wvf') |
            (splitext(filename)[1].lower() == 'wdf')
            for filename in filenames]):
        [
            wdf2wvf(filename) for filename in filenames
            if splitext(filename)[1].lower() == 'wdf'
        ]
        return IndexableDict({
            pathsplit(filename)[1].split('.')[0]: DataFile(filename)
            for filename in filenames
        })
    if ('hdf' in splitext(filenames[0]).lower) | ('h5' in splitext(
            filenames[0]).lower):
        return read_hdf5(filenames)
Ejemplo n.º 13
0
def files_different(src, dest):
    from os.path import isfile, split as pathsplit, getsize, isdir

    srcparent, destparent = pathsplit(src)[0], pathsplit(dest)[0]

    # TODO: mtime?
    if not isdir(srcparent) or not isdir(destparent):
        return True
    if not isfile(src) or not isfile(dest):
        return True
    if getsize(src) != getsize(dest):
        return True

    if md5_file(src) != md5_file(dest):
        return True

    return False
Ejemplo n.º 14
0
 def current(self):
     try:
         l = readlink(self.link)
         prefix = self.name + '.'
         return pathsplit(l)[1].split(prefix, 2)[1]
     except:
         pass
     return None
Ejemplo n.º 15
0
def test_divide_combi():

    import pydream2015
    indir = join(pathsplit(pydream2015.__file__)[0], 'test_input')
    outdir = join(pathsplit(pydream2015.__file__)[0], 'test_output')
    indir = os.path.abspath(indir)
    outdir = os.path.abspath(outdir)
    pydream2015.initdatapath(indir, outdir)

    therapy_traindata = pd.read_csv(pydream2015.DATA_COMBITHERAPY)
    trainids, testids = pydream2015.util.divide_combi(therapy_traindata,
                                                      ratio=0.3)

    print(len(trainids))
    print(len(testids))

    pass
Ejemplo n.º 16
0
def files_different(src, dest):
    from os.path import isfile, split as pathsplit, getsize, isdir

    srcparent, destparent = pathsplit(src)[0], pathsplit(dest)[0]

    # TODO: mtime?
    if not isdir(srcparent) or not isdir(destparent):
        return True
    if not isfile(src) or not isfile(dest):
        return True
    if getsize(src) != getsize(dest):
        return True

    if md5_file(src) != md5_file(dest):
        return True

    return False
Ejemplo n.º 17
0
def parts(path):
    """
    Split a path by the path separator (/)

    run doctests with the command:

    python -m doctest -v randdiff.py

    >>> parts('/')
    ('/',)
    >>> parts('2011')
    ('2011',)
    >>> parts('2011/')
    ('2011',)
    >>> parts('/2011')
    ('/', '2011')
    >>> parts('a/b')
    ('a', 'b')
    >>> parts('a/b/')
    ('a', 'b')
    >>> parts('/a/b')
    ('/', 'a', 'b')
    >>> parts('/a/b/')
    ('/', 'a', 'b')
    >>> pathjoin(*(parts('/')))
    '/'
    >>> pathjoin(*(parts('2011')))
    '2011'
    >>> pathjoin(*(parts('2011/')))
    '2011'
    >>> pathjoin(*(parts('/2011')))
    '/2011'
    >>> pathjoin(*(parts('a/b')))
    'a/b'
    >>> pathjoin(*(parts('a/b/')))
    'a/b'
    >>> pathjoin(*(parts('/a/b')))
    '/a/b'
    >>> pathjoin(*(parts('/a/b/')))
    '/a/b'
    >>>

    """
    (direc, base) = pathsplit(path)
    if direc == '':
        return (base,)

    if direc == '/':
        direc = (direc,)
    else:
        direc = parts(direc)

    if base == '':
        return direc
    else:
        return direc + (base,)
Ejemplo n.º 18
0
Archivo: cpd.py Proyecto: dot-home/cpd
def pathcomponents(path):
    ''' Given a path of '/'-separated components, return the components.
        This excludes empty components excepting one at the end if the
        path ends with '/'.
    '''
    prefix, component = pathsplit(path)
    if prefix not in ['', '/']:  # Fixed point
        return pathcomponents(prefix) + [component]
    else:
        return [component]
Ejemplo n.º 19
0
    def _getcaller(self):
        '''
        Grab the name, filename, and line number of the function that created
        this Timer.
        '''

        f = sys._getframe(2)
        caller_name = f.f_code.co_name
        filename = pathsplit(f.f_code.co_filename)[-1]
        linenumber = f.f_code.co_firstlineno
        self.called_from = '%s:%s:%s' % (filename, caller_name, linenumber)
Ejemplo n.º 20
0
    def _getcaller(self):
        '''
        Grab the name, filename, and line number of the function that created
        this Timer.
        '''

        f = sys._getframe(2)
        caller_name = f.f_code.co_name
        filename = pathsplit(f.f_code.co_filename)[-1]
        linenumber = f.f_code.co_firstlineno
        self.called_from = '%s:%s:%s' % (filename, caller_name, linenumber)
Ejemplo n.º 21
0
def smooth_mutation_NCI(nci, sig, alp, mat, vec, smoothed, overwrite=False,
        test=False):
    if exists(smoothed) and (overwrite==False): 
        return 

    result_dir = pathsplit(smoothed)[0]

    smooth_mutation_NCI_pre(nci, sig, mat, vec, test=test) 

    print '[smoothing]'
    print '>>', smoothed
    run_smoother(mat, vec, smoothed, alpha=alp, ncores=4, test=test)
Ejemplo n.º 22
0
    def _build_attachment(self, full_path):

        file_name = pathsplit(full_path)[1]

        with open(full_path, "rb") as attachment:
            part = MIMEBase('application', 'octet-stream')
            part.set_payload((attachment).read())
            encoders.encode_base64(part)
            part.add_header('Content-Disposition',
                            'attachment; filename= {}'.format(file_name))

        return part
Ejemplo n.º 23
0
 def _move_file(self, file, dst):
     if self.isdir(dst):
         raise DirectoryConflictError(dst)
     dst, file.name = pathsplit(dst.lstrip('/'))
     if dst:
         try:
             file.parent = \
                 UserDir.objects.get(path=dst, user=self.user)
         except UserDir.DoesNotExist:
             raise DirectoryNotFoundError(dst)
     file.save(update_fields=['parent', 'name'])
     return file
Ejemplo n.º 24
0
def save_file_cache(obj, data, user=False):
    if not getattr(obj, '_disk_cacheable', True):
        return
    cache_path = get_obj_cache_path(obj, user)

    # Ensure that the location for the cache file exists.
    cache_head = pathsplit(cache_path)[0]
    if not pathexists(cache_head):
        os.makedirs(cache_head)

    # Pickle, compress, and write out.
    with file(cache_path, 'wb') as f:
        f.write(data)
Ejemplo n.º 25
0
    def get_metadata(self, path):
        sidecarJSON = path.replace(".nii.gz", ".json").replace(".nii", ".json")
        path_components = pathsplit(sidecarJSON)
        filename_components = path_components[-1].split("_")
        ses = None
        suffix = filename_components[-1]

        sub = filename_components[0]
        keyword_components = filename_components[1:-1]
        if filename_components[1][:3] == "ses":
            ses = filename_components[1]
            keyword_components = filename_components[2:-1]

        potentialJSONs = []
        for prefixes, midlayer, conditional in (  # Levels
            (tuple(), tuple(), True),  # top
            ((sub, ), tuple(), True),  # subject
            ((sub, ), (pathsplit(path_components[-2])[-1], ), True),
            ((sub, ses), tuple(), ses),  # session
            ((sub, ses), (pathsplit(path_components[-2])[-1], ), ses)):
            if not conditional:
                continue
            for k in range(len(keyword_components) + 1):
                for components in combinations(keyword_components, k):
                    potentialJSONs.append(
                        pathjoin(
                            self.root,
                            *(prefixes + midlayer +
                              ("_".join(prefixes + components +
                                        (suffix, )), ))))

        merged_param_dict = {}
        for json_file_path in potentialJSONs:
            if os.path.exists(json_file_path):
                param_dict = json.load(open(json_file_path, "r"))
                merged_param_dict.update(param_dict)

        return merged_param_dict
Ejemplo n.º 26
0
def run(cmd, verbose=False, dry_run=False):
    """
    Run system command.
  """
    LANGUAGE = "en"
    # Show "live" command output in verbose mode
    command_name = pathsplit(abspath(cmd.strip().split(' ')[0]))[1]
    command = "env LANGUAGE=%s %s" % (LANGUAGE, cmd)
    if verbose:
        print " INFO - Run `%s`..." % command
    # Debug mode or not, we print a nice formatted output of the command
    if not dry_run:
        result = getstatusoutput(command)
        nice_log(log=result[1], cmd_name=command_name)
def run(cmd, verbose=False, dry_run=False):
  """
    Run system command.
  """
  LANGUAGE = "en"
  # Show "live" command output in verbose mode
  command_name = pathsplit(abspath(cmd.strip().split(' ')[0]))[1]
  command = "env LANGUAGE=%s %s" % (LANGUAGE, cmd)
  if verbose:
    print " INFO - Run `%s`..." % command
  # Debug mode or not, we print a nice formatted output of the command
  if not dry_run:
    result = getstatusoutput(command)
    nice_log(log=result[1], cmd_name=command_name)
Ejemplo n.º 28
0
    def __init__(self, json_path):
        with open(json_path, 'r', encoding='utf-8') as f:
            unicode_safe = f.read()
        self.json = json.loads(unicode_safe)
        self.info = self.json['info']
        self.images = self.json['images']
        try:
            self.options = self.json['options']
        except KeyError:
            self.options = []

        self.all_files = [
        ]  # Dicts are not a hashable type, so they need a list
        self.all_filenames = set()
        self.new_files = []

        for i in self.images:
            try:
                floppy_files = i['floppy']['files']
                for ff in floppy_files:
                    try:
                        _ = ff['new_file']
                        self.new_files.append(hf)
                    except KeyError:
                        self.all_files.append(ff)
                        self.all_filenames.add(ff['name'])
            except KeyError:
                hdd_files = i['hdd']['files']
                for hf in hdd_files:
                    try:
                        _ = hf['new_file']
                        self.new_files.append(hf)
                    except KeyError:
                        self.all_files.append(hf)
                        self.all_filenames.add(hf['name'])
        self.all_filenames = list(self.all_filenames)

        self.patch_dir = pathjoin(pathsplit(json_path)[0], 'patch')

        # TODO: What other stuff do I need easier access to?
        if not self._validate_config():
            # TODO: Need a more specific message of what is wrong with it.
            message_wait_close(
                "A config option in %s is not supported by this verison of Pachy98. Download a newer version."
                % selected_config)
        try:
            self._validate_patch_existence()
        except FileNotFoundError as e:
            message_wait_close(
                "This config references a patch %s that doesn't exist." % e)
Ejemplo n.º 29
0
Archivo: sdjas.py Proyecto: rjolly/jas
def _uri_to_name(uri):
    """
    Converts a uri to a name or key by only taking everything
    after the last / or (if present) #.

    Examples:
     - http://example.com/test             ->   test
     - http://example.com/model#testedBy   ->   testedBy
    """
    usplit = urlsplit(uri)
    if usplit.fragment != '':
        return usplit.fragment
    else:
        return pathsplit(usplit.path)[-1]
Ejemplo n.º 30
0
def splitall(path):
    allparts = []
    while 1:
        parts = pathsplit(path)
        if parts[0] == path:  # sentinel for absolute paths
            allparts.insert(0, parts[0])
            break
        elif parts[1] == path:  # sentinel for relative paths
            allparts.insert(0, parts[1])
            break
        else:
            path = parts[0]
            allparts.insert(0, parts[1])
    return allparts
Ejemplo n.º 31
0
def _uri_to_name(uri):
    """
    Converts a uri to a name or key by only taking everything
    after the last / or (if present) #.

    Examples:
     - http://example.com/test             ->   test
     - http://example.com/model#testedBy   ->   testedBy
    """
    usplit = urlsplit(uri)
    if usplit.fragment != '':
        return usplit.fragment
    else:
        return pathsplit(usplit.path)[-1]
Ejemplo n.º 32
0
Archivo: url.py Proyecto: eteq/pyvo
def url_sibling(url, sibling):
    """
    Replaces the last path element in an url

    Parameters
    ----------
    url : str
        The url for which the last path element should be replaced
    sibling : str
        The replace value
    """
    parsed = urlparse(url)
    newpath_segments = pathsplit(parsed.path)[:-1] + (sibling,)
    newpath = pathjoin(*newpath_segments)
    return urlunparse(list(parsed[:2]) + [newpath] + list(parsed[3:]))
Ejemplo n.º 33
0
    def __init__(self, filename=None, verbose=False):
        if filename == None:
            return
        else:
            self.filename = filename
            file_info, trace_info_array = self.read_hdr()
            [setattr(self, k, v) for k, v in file_info.items()]

            self.traces = IndexableDict({})

            for info in trace_info_array:
                self.traces[info['name']] = Trace(
                    {key: value
                     for key, value in zip(info.dtype.names, info)}, self)

            self.date = file_info['date']
            self.name = pathsplit(filename)[1].split('.')[0]
Ejemplo n.º 34
0
def batchunit(inpfile, outfile, with_small=False, overwrite=False):

    jsonstr = open(inpfile).readlines()
    jsonstr = "\n".join(jsonstr)

    hash_object = hashlib.md5(jsonstr.encode())
    _cwd = os.getcwd()
    inpfile = join(_cwd, inpfile)
    outfile = join(_cwd, outfile)
    workdir = join('datafiles', hash_object.hexdigest())

    if not exists(workdir):
        os.makedirs(workdir)

    os.chdir(workdir)
    mixedmodelmain(inpfile, outfile, with_small, overwrite)

    tgzdata = pathsplit(outfile)[1].split('.')[0] + '_data.tar.gz'
    os.system('tar -cvzf ../../%s *' % tgzdata)
Ejemplo n.º 35
0
def mailinglist_analyse(resdir, mldir, codeface_conf, project_conf, loglevel,
                        logfile, jobs, mailinglists, use_corpus):
    conf = Configuration.load(codeface_conf, project_conf)
    ml_resdir = pathjoin(resdir, conf["project"], "ml")

    exe = abspath(resource_filename(__name__, "R/ml/batch.r"))
    cwd, _ = pathsplit(exe)
    cmd = []
    cmd.extend(("--loglevel", loglevel))
    cmd.extend(("-c", codeface_conf))
    cmd.extend(("-p", project_conf))
    cmd.extend(("-j", str(jobs)))
    if (use_corpus):
        cmd.append("--use-corpus")
    cmd.append(ml_resdir)
    cmd.append(mldir)
    if not mailinglists:
        mailinglist_conf = conf["mailinglists"]
    else:
        mailinglist_conf = []
        for mln in mailinglists:
            match = [ml for ml in conf["mailinglists"] if ml["name"] == mln]
            if not match:
                log.fatal("Mailinglist '{}' not listed in configuration file!".
                          format(ml))
                raise Exception("Unknown mailing list")
            if len(match) > 1:
                log.fatal(
                    "Mailinglist '{}' specified twice in configuration file!".
                    format(ml))
                raise Exception("Invalid config file")
            mailinglist_conf.append(match[0])

    for i, ml in enumerate(mailinglist_conf):
        log.info(
            "=> Analysing mailing list '{name}' of type '{type}'".format(**ml))
        logargs = []
        if logfile:
            logargs = ["--logfile", "{}.R.ml.{}".format(logfile, i)]
        execute_command([exe] + logargs + cmd + [ml["name"]],
                        direct_io=True,
                        cwd=cwd)
    log.info("=> Codeface mailing list analysis complete!")
Ejemplo n.º 36
0
def sociotechnical_analyse(resdir, codeface_conf, project_conf, loglevel,
                           logfile, n_jobs):
    conf = Configuration.load(codeface_conf, project_conf)
    project_resdir = pathjoin(resdir, conf["project"])

    exe = abspath(resource_filename(__name__, "R/sociotechnical.r"))
    cwd, _ = pathsplit(exe)
    cmd = [exe]
    if logfile:
        cmd.extend(("--logfile", "{}.R.sociotechnical".format(logfile)))
    cmd.extend(("--loglevel", loglevel))
    cmd.extend(("-c", codeface_conf))
    cmd.extend(("-p", project_conf))
    cmd.extend(("-j", str(n_jobs)))
    cmd.append(project_resdir)

    log.info("=> Performing socio-technical analysis")
    execute_command(cmd, direct_io=True, cwd=cwd)
    generate_report_st(pathjoin(resdir, conf["project"], "st"))
    log.info("=> Codeface socio-technical analysis complete!")
Ejemplo n.º 37
0
def mailinglist_analyse(resdir, mldir, codeface_conf, project_conf, loglevel,
                        logfile, jobs, mailinglists, use_corpus):
    conf = Configuration.load(codeface_conf, project_conf)
    ml_resdir = pathjoin(resdir, conf["project"], "ml")

    exe = abspath(resource_filename(__name__, "R/ml/batch.r"))
    cwd, _ = pathsplit(exe)
    cmd = []
    cmd.extend(("--loglevel", loglevel))
    cmd.extend(("-c", codeface_conf))
    cmd.extend(("-p", project_conf))
    cmd.extend(("-j", str(jobs)))
    if (use_corpus):
        cmd.append("--use-corpus")
    cmd.append(ml_resdir)
    cmd.append(mldir)
    if not mailinglists:
        mailinglist_conf = conf["mailinglists"]
    else:
        mailinglist_conf = []
        for mln in mailinglists:
            match = [ml for ml in conf["mailinglists"] if ml["name"] == mln]
            if not match:
                log.fatal("Mailinglist '{}' not listed in configuration file!".
                    format(ml))
                raise Exception("Unknown mailing list")
            if len(match) > 1:
                log.fatal("Mailinglist '{}' specified twice in configuration file!".
                    format(ml))
                raise Exception("Invalid config file")
            mailinglist_conf.append(match[0])

    for i, ml in enumerate(mailinglist_conf):
        log.info("=> Analysing mailing list '{name}' of type '{type}'".
                format(**ml))
        logargs = []
        if logfile:
            logargs = ["--logfile", "{}.R.ml.{}".format(logfile, i)]
        execute_command([exe] + logargs + cmd + [ml["name"]],
                direct_io=True, cwd=cwd)
    log.info("=> Codeface mailing list analysis complete!")
Ejemplo n.º 38
0
def main():
    vectorizer = CountVectorizer(ngram_range=(1, 2), max_df=1.0, min_df=0.0)

    nei = NearestNeighbors(algorithm='brute', metric='jaccard')
    matrix = vectorizer.fit_transform(training_set).todense()
    new_matrix = vectorizer.transform(new_comments).todense()
    nei.fit(matrix)
    path = '{0}/'.format(pathsplit(abspath(__file__))[0])
    jsonfile = open(path + '{0}-nn.json'.format(n_neighbors), 'w')

    nodes = [{
        'name': (training_set + new_comments)[i],
        'group': (groups + new_groups)[i]
    } for i in range(len(training_set + new_comments))]
    links = []

    for i in range(len(matrix)):
        dist, idnei = nei.kneighbors(matrix[i], n_neighbors=n_neighbors + 1)
        dist, idnei = dist[0], idnei[0]

        for j in range(len(idnei[1:])):
            links.append({
                "source": i,
                "target": idnei[j + 1],
                "value": 10 * (1 - dist[j + 1])
            })

    for i in range(len(new_comments)):
        dist, idnei = nei.kneighbors(new_matrix[i],
                                     n_neighbors=n_neighbors + 1)
        dist, idnei = dist[0], idnei[0]
        for j in range(len(idnei[1:])):
            links.append({
                "source": len(matrix) + i,
                "target": idnei[j],
                "value": 10 * (1 - dist[j + 1])
            })

    jsondumped = json.dumps({'nodes': nodes, 'links': links}, indent=2)

    jsonfile.write(jsondumped)
Ejemplo n.º 39
0
def main():

    nei = NearestNeighbors(metric='euclidean')
    nei.fit(matrix)
    path =  '{0}/'.format(pathsplit(abspath(__file__))[0])

    jsonfile = open(path + '{1}_rand-{0}-nn.json'.format(n_neighbors, name), 'w')

    nodes = [{'name': i,
              'group':groups[i]}
             for i in range(len(matrix))]
    links = []

    for i in range(len(matrix)):
        dist, idnei = nei.kneighbors(matrix[i], n_neighbors=n_neighbors + 1)
        dist, idnei = dist[0], idnei[0]

        for j in range(len(idnei[1:])):
            links.append({"source":i,"target":idnei[j+1],"value":10*(1 - dist[j+1])})

    jsondumped = json.dumps({'nodes':nodes, 'links':links}, indent=2)

    jsonfile.write(jsondumped)
Ejemplo n.º 40
0
 def split_extra(self, path, extra_info='', nodir=1):
     """
     if given a path will extra path info, will
     return (path of existing file, extra path info)
     if some part of the path represents an existing file, and
     ('', '') if there is no file.  If nodir is true,
     the file must not be a directory unless extra_info=='', or
     the return value will be ('', '').
     """
     
     if path=='':
         return ('', '')
     if self.exists(path):
         if nodir and self.isdir(path) and extra_info!='':
             return ('', '')
         return path, extra_info
     else:
         p, e=pathsplit(path)
         if extra_info:
             ex='%s/%s' % (e, extra_info)
         else:
             ex=e
         return self.split_extra(p, ex, nodir)
Ejemplo n.º 41
0
def _prep_file_under_git(path, filename):
    """Get instance of the repository for the given filename

    Helper to be used by few functions
    """
    if filename is None:
        # path provides the path and the name
        path, filename = pathsplit(path)
    try:
        # if succeeds when must not (not `annexed`) -- fail
        repo = get_repo_instance(path, class_=AnnexRepo)
        annex = True
    except RuntimeError as e:
        # TODO: make a dedicated Exception
        if "No annex repository found in" in str(e):
            repo = get_repo_instance(path, class_=GitRepo)
            annex = False
        else:
            raise

    # path to the file within the repository
    file_repo_dir = os.path.relpath(path, repo.path)
    file_repo_path = filename if file_repo_dir == curdir else opj(file_repo_dir, filename)
    return annex, file_repo_path, filename, path, repo
Ejemplo n.º 42
0
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from os.path import realpath, split as pathsplit
from os.path import join as pathjoin

DATA_FILE = realpath(pathjoin(pathsplit(__file__)[0], 'dorms.txt'))
DATA_ENC = 'utf-8-sig'

SEGTYPE_FIELD = u'segtype'
# now the parsing of individual parts uses dynamic dispatch
SEGTYPE_UNIFORM = u'uniform'
SEGTYPE_SPECIAL = u'special'
# lambdas are used to prevent scoping error
SEGTYPE_DISPATCH = {SEGTYPE_UNIFORM: lambda a, b, c: parse_uniform(a, b, c),
                    SEGTYPE_SPECIAL: lambda a, b, c: parse_special(a, b, c),
                    }

IPPREFIX_FIELD = u'ipprefix'
IP_BEGIN_FIELD = u'ipsubstart'
IP_STEP_FIELD = u'ipstep'
APARTMENT_START_FIELD = u'apartmentstart'
Ejemplo n.º 43
0
def project_analyse(resdir, gitdir, codeface_conf, project_conf,
                    no_report, loglevel, logfile, recreate, profile_r,
                    n_jobs, tagging_type, reuse_db):
    pool = BatchJobPool(int(n_jobs))
    conf = Configuration.load(codeface_conf, project_conf)
    tagging = conf["tagging"]
    if tagging_type is not "default":

        if not tagging_type in LinkType.get_all_link_types():
            log.critical('Unsupported tagging mechanism specified!')
            raise ConfigurationError('Unsupported tagging mechanism.')
        # we override the configuration value
        if tagging is not tagging_type:
            log.warn(
                "tagging value is overwritten to {0} because of --tagging"
                .format(tagging_type))
            tagging = tagging_type
            conf["tagging"] = tagging

    project = conf["project"]
    repo = pathjoin(gitdir, conf["repo"], ".git")
    project_resdir = pathjoin(resdir, project, tagging)
    range_by_date = False

    # When revisions are not provided by the configuration file
    # generate the analysis window automatically
    if len(conf["revisions"]) < 2:
        window_size_months = 3 # Window size in months
        num_window = -1  # Number of ranges to analyse, -1 captures all ranges
        revs, rcs = generate_analysis_windows(repo, window_size_months)
        conf["revisions"] = revs[-num_window-1:]
        conf["rcs"] = rcs[-num_window-1:]
        range_by_date = True

    # TODO: Sanity checks (ensure that git repo dir exists)
    if tagging == LinkType.proximity:
        check4ctags()
    elif tagging in (LinkType.feature, LinkType.feature_file):
        check4cppstats()

    project_id, dbm, all_range_ids = project_setup(conf, recreate)

    ## Save configuration file
    conf.write()
    project_conf = conf.get_conf_file_loc()

    # Analyse new revision ranges
    for i, range_id in enumerate(all_range_ids):
        start_rev, end_rev, rc_rev = dbm.get_release_range(project_id, range_id)
        range_resdir = pathjoin(project_resdir, "{0}-{1}".
                format(start_rev, end_rev))
        prefix = "  -> Revision range {0}..{1}: ".format(start_rev, end_rev)

        #######
        # STAGE 1: Commit analysis
        s1 = pool.add(
                doProjectAnalysis,
                (conf, start_rev, end_rev, rc_rev, range_resdir, repo,
                    reuse_db, True, range_by_date),
                startmsg=prefix + "Analysing commits...",
                endmsg=prefix + "Commit analysis done."
            )

        #########
        # STAGE 2: Cluster analysis
        exe = abspath(resource_filename(__name__, "R/cluster/persons.r"))
        cwd, _ = pathsplit(exe)
        cmd = []
        cmd.append(exe)
        cmd.extend(("--loglevel", loglevel))
        if logfile:
            cmd.extend(("--logfile", "{}.R.r{}".format(logfile, i)))
        cmd.extend(("-c", codeface_conf))
        cmd.extend(("-p", project_conf))
        cmd.append(range_resdir)
        cmd.append(str(range_id))

        s2 = pool.add(
                execute_command,
                (cmd,),
                {"direct_io":True, "cwd":cwd},
                deps=[s1],
                startmsg=prefix + "Detecting clusters...",
                endmsg=prefix + "Detecting clusters done."
            )

        #########
        # STAGE 3: Generate cluster graphs
        if not no_report:
            pool.add(
                    generate_reports,
                    (start_rev, end_rev, range_resdir),
                    deps=[s2],
                    startmsg=prefix + "Generating reports...",
                    endmsg=prefix + "Report generation done."
                )

    # Wait until all batch jobs are finished
    pool.join()

    #########
    # Global stage 1: Time series generation
    log.info("=> Preparing time series data")
    dispatch_ts_analysis(project_resdir, conf)

    #########
    # Global stage 2: Complexity analysis
    ## NOTE: We rely on proper timestamps, so we can only run
    ## after time series generation
    log.info("=> Performing complexity analysis")
    for i, range_id in enumerate(all_range_ids):
        log.info("  -> Analysing range {}".format(range_id))
        exe = abspath(resource_filename(__name__, "R/complexity.r"))
        cwd, _ = pathsplit(exe)
        cmd = [exe]
        if logfile:
            cmd.extend(("--logfile", "{}.R.complexity.{}".format(logfile, i)))
        cmd.extend(("--loglevel", loglevel))
        cmd.extend(("-c", codeface_conf))
        cmd.extend(("-p", project_conf))
        cmd.extend(("-j", str(n_jobs)))
        cmd.append(repo)
        cmd.append(str(range_id))
        execute_command(cmd, direct_io=True, cwd=cwd)

    #########
    # Global stage 3: Time series analysis
    log.info("=> Analysing time series")
    exe = abspath(resource_filename(__name__, "R/analyse_ts.r"))
    cwd, _ = pathsplit(exe)
    cmd = [exe]
    if profile_r:
        cmd.append("--profile")
    if logfile:
        cmd.extend(("--logfile", "{}.R.ts".format(logfile)))
    cmd.extend(("--loglevel", loglevel))
    cmd.extend(("-c", codeface_conf))
    cmd.extend(("-p", project_conf))
    cmd.extend(("-j", str(n_jobs)))
    cmd.append(project_resdir)
    execute_command(cmd, direct_io=True, cwd=cwd)
    log.info("=> Codeface run complete!")
Ejemplo n.º 44
0
def project_analyse(resdir, gitdir, codeface_conf, project_conf,
                    no_report, loglevel, logfile, recreate, profile_r, n_jobs):
    pool = BatchJobPool(int(n_jobs))
    conf = Configuration.load(codeface_conf, project_conf)
    project, tagging = conf["project"], conf["tagging"]
    repo = pathjoin(gitdir, conf["repo"], ".git")
    project_resdir = pathjoin(resdir, project, tagging)

    # When revisions are not provided by the configuration file
    # generate the analysis window automatically
    if len(conf["revisions"]) < 2:
	window_size_months = 3 # Window size in months
	conf["revisions"], conf["rcs"] = generate_analysis_windows(repo, window_size_months)

    # TODO: Sanity checks (ensure that git repo dir exists)
    if 'proximity' == conf["tagging"]:
        check4ctags()

    project_id, dbm, all_range_ids = project_setup(conf, recreate)

    ## Save configuration file
    conf.write()
    project_conf = conf.get_conf_file_loc()

    # Analyse new revision ranges
    for i, range_id in enumerate(all_range_ids):
        start_rev, end_rev, rc_rev = dbm.get_release_range(project_id, range_id)
        range_resdir = pathjoin(project_resdir, "{0}-{1}".
                format(start_rev, end_rev))
        prefix = "  -> Revision range {0}..{1}: ".format(start_rev, end_rev)

        #######
        # STAGE 1: Commit analysis
        s1 = pool.add(
                doProjectAnalysis,
                (conf, start_rev, end_rev, rc_rev, range_resdir, repo,
                    True, True),
                startmsg=prefix + "Analysing commits...",
                endmsg=prefix + "Commit analysis done."
            )

        #########
        # STAGE 2: Cluster analysis
        exe = abspath(resource_filename(__name__, "R/cluster/persons.r"))
        cwd, _ = pathsplit(exe)
        cmd = []
        cmd.append(exe)
        cmd.extend(("--loglevel", loglevel))
        if logfile:
            cmd.extend(("--logfile", "{}.R.r{}".format(logfile, i)))
        cmd.extend(("-c", codeface_conf))
        cmd.extend(("-p", project_conf))
        cmd.append(range_resdir)
        cmd.append(str(range_id))

        s2 = pool.add(
                execute_command,
                (cmd,),
                {"direct_io":True, "cwd":cwd},
                deps=[s1],
                startmsg=prefix + "Detecting clusters...",
                endmsg=prefix + "Detecting clusters done."
            )

        #########
        # STAGE 3: Generate cluster graphs
        if not no_report:
            pool.add(
                    generate_reports,
                    (start_rev, end_rev, range_resdir),
                    deps=[s2],
                    startmsg=prefix + "Generating reports...",
                    endmsg=prefix + "Report generation done."
                )

    # Wait until all batch jobs are finished
    pool.join()

    #########
    # Global stage 1: Time series generation
    log.info("=> Preparing time series data")
    dispatch_ts_analysis(project_resdir, conf)

    #########
    # Global stage 2: Complexity analysis
    ## NOTE: We rely on proper timestamps, so we can only run
    ## after time series generation
    log.info("=> Performing complexity analysis")
    for i, range_id in enumerate(all_range_ids):
        log.info("  -> Analysing range {}".format(range_id))
        exe = abspath(resource_filename(__name__, "R/complexity.r"))
        cwd, _ = pathsplit(exe)
        cmd = [exe]
        if logfile:
            cmd.extend(("--logfile", "{}.R.complexity.{}".format(logfile, i)))
        cmd.extend(("--loglevel", loglevel))
        cmd.extend(("-c", codeface_conf))
        cmd.extend(("-p", project_conf))
        cmd.extend(("-j", str(n_jobs)))
        cmd.append(repo)
        cmd.append(str(range_id))
        execute_command(cmd, direct_io=True, cwd=cwd)

    #########
    # Global stage 3: Time series analysis
    log.info("=> Analysing time series")
    exe = abspath(resource_filename(__name__, "R/analyse_ts.r"))
    cwd, _ = pathsplit(exe)
    cmd = [exe]
    if profile_r:
        cmd.append("--profile")
    if logfile:
        cmd.extend(("--logfile", "{}.R.ts".format(logfile)))
    cmd.extend(("--loglevel", loglevel))
    cmd.extend(("-c", codeface_conf))
    cmd.extend(("-p", project_conf))
    cmd.extend(("-j", str(n_jobs)))
    cmd.append(project_resdir)
    execute_command(cmd, direct_io=True, cwd=cwd)
    log.info("=> Prosoda run complete!")
Ejemplo n.º 45
0
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


from __future__ import unicode_literals, division

from os.path import realpath, split as pathsplit
from os.path import join as pathjoin

from cPickle import loads

from kbspasswd import kbs_encode

DATA_FILE = realpath(pathjoin(pathsplit(__file__)[0], 'PASSWDS.pickle'))


def build_dict():
    with open(DATA_FILE, 'rb') as fp:
        content = fp.read()

    return loads(content)


_dict = build_dict()


def chkpasswd(uid, psw):
    return _dict[uid] == kbs_encode(uid, psw)
Ejemplo n.º 46
0
def conway_analyse(resdir, gitdir, titandir, codeface_conf, project_conf,
                   loglevel, logfile, jobs):
    conf = Configuration.load(codeface_conf, project_conf)

    log.info("Performing conway analysis")

    pool = BatchJobPool(int(jobs))
    conf = Configuration.load(codeface_conf, project_conf)
    project = conf["project"]
    repo = pathjoin(gitdir, conf["repo"], ".git")
    project_resdir = pathjoin(resdir, project, "conway")
    range_by_date = False

    if conf["tagging"] != "proximity":
        log.error("Conway analysis requires analysis in proximity mode!")
        return

    # Set defaults for the various analysis choices if they are not explicitly
    # given in the configuration file
    if "artifactType" not in conf.keys():
        conf["artifactType"] = "file"
        log.info("Conway analysis: No artefact type given, defaulting to 'file'")

    if "dependencyType" not in conf.keys():
        conf["dependencyType"] = "none"
        log.info("Conway analysis: No dependency type given, defaulting to 'none'")

    if "qualityType" not in conf.keys():
        conf["qualityType"] = "corrective"
        log.info("Conway analysis: No quality type given, defaulting to 'corrective'")

    if "communicationType" not in conf.keys():
        conf["communicationType"] = "mail"
        log.info("Conway analysis: No communication type given, defaulting to 'mail'")

    if conf["communicationType"] == "jira" and \
        (("issueTrackerType" in conf.keys() and conf["issueTrackerType"] != "jira") or \
        not("issueTrackerType" in conf.keys())):
        log.info("Conway analysis configuration requires jira bugtracking information, exiting")
        return

    # When revisions are not provided by the configuration file
    # generate the analysis window automatically
    if len(conf["revisions"]) < 2:
        window_size_months, num_window = get_analysis_windows(conf)
        revs, rcs, dates = generate_analysis_windows(repo, window_size_months)
        conf["revisions"] = revs[-num_window-1:]
        conf["rcs"] = rcs[-num_window-1:]
        range_by_date = True

    project_id, dbm, all_range_ids = project_setup(conf, False)

    ## Save modified configuration file to a temporary location
    conf.write()
    project_conf = conf.get_conf_file_loc()

    # Global stage: Download and process JIRA issues
    if conf["communicationType"] == "jira":
        log.info("=> Downloading and processing JIRA issues")
        dispatch_jira_processing(project_resdir, titandir, conf)

    # Revision range specific analysis
    for i, range_id in enumerate(all_range_ids):
        start_rev, end_rev, rc_rev = dbm.get_release_range(project_id, range_id)
        start_date = dbm.get_commit_cdate(project_id, start_rev)
        end_date = dbm.get_commit_cdate(project_id, end_rev)
        range_resdir = gen_range_path(project_resdir, i+1, start_rev, end_rev)
        prefix = gen_prefix(i+1, len(all_range_ids), start_rev, end_rev)

        #######
        # STAGE 1:
        s1 = pool.add(
                parseCommitLoC,
                (conf, dbm, project_id, range_id, start_rev, end_rev,
                 range_resdir, repo),
                startmsg=prefix + "Computing file/developer relations...",
                endmsg=prefix + "Computing file/developer relations done."
            )

        #########
        # STAGE 2: Connect commits and jira issues
        if "communicationType" in conf.keys() and conf["communicationType"] == "jira":
            exe = abspath(resource_filename(__name__, "R/conway_metrics.r"))
            cwd, _ = pathsplit(exe)
            cmd = []
            cmd.append(exe)
            cmd.extend(("--loglevel", loglevel))
            if logfile:
                cmd.extend(("--logfile", "{}.R.r{}".format(logfile, i)))
            cmd.extend(("-c", codeface_conf))
            cmd.extend(("-p", project_conf))
            cmd.append(project_resdir)
            cmd.append(range_resdir)

            s2 = pool.add(
                execute_command,
                (cmd,),
                {"direct_io":True, "cwd":cwd},
                deps=[s1],
                startmsg=prefix + "Connecting commits and issues...",
                endmsg=prefix + "Connecting commits and issues done."
                )


        #######
        # STAGE 3: Obtain SDSM using Titan
        if "dependencyType" in conf.keys() and conf["dependencyType"] == "dsm":
            exe = abspath(resource_filename(__name__, "R/titan.r"))
            cwd, _ = pathsplit(exe)
            cmd = []
            cmd.append(exe)
            cmd.extend(("--loglevel", loglevel))
            if logfile:
                cmd.extend(("--logfile", "{}.R.r{}".format(logfile, i)))
            cmd.extend(("-c", codeface_conf))
            cmd.extend(("-p", project_conf))
            cmd.append(repo)
            cmd.append(range_resdir)
            cmd.append(titandir)
            cmd.append(end_rev)

            s3 = pool.add(
                execute_command,
                (cmd,),
                {"direct_io":True, "cwd":cwd},
                deps=[],
                startmsg=prefix + "Inferring architectural metrics with Titan...",
                endmsg=prefix + "Titan run done."
                )

        #########
        # STAGE 4: Perform socio-technical analysis
        exe = abspath(resource_filename(__name__, "R/socio_technical_analysis.r"))
        cwd, _ = pathsplit(exe)
        cmd = []
        cmd.append(exe)
        cmd.extend(("--loglevel", loglevel))
        if logfile:
            cmd.extend(("--logfile", "{}.R.r{}".format(logfile, i)))
        cmd.extend(("-c", codeface_conf))
        cmd.extend(("-p", project_conf))
        cmd.append(project_resdir)
        cmd.append(range_resdir)
        cmd.append(start_date)
        cmd.append(end_date)
        if "dependencyType" in conf.keys() and conf["dependencyType"] == "dsm":
            deps=[s3]
        else:
            deps=[]

        s4 = pool.add(
                execute_command,
                (cmd,),
                {"direct_io":True, "cwd":cwd},
                deps=deps,
                startmsg=prefix + "Performing socio-technical analysis...",
                endmsg=prefix + "Socio-technical analysis done."
            )

    # Wait until all batch jobs are finished
    pool.join()
    log.info("=> Codeface conway analysis complete!")
Ejemplo n.º 47
0
    def __enter__(self):
        '''
        This function is called when entering a with statement.
        Here a real git repository is created and populated with the
        information in the project.
        '''
        self.directory = mkdtemp(prefix="codeface_test_project")
        cwd = getcwd()
        try:
            chdir(self.directory)
            def git(cmds, committer=None, commitdate=None):
                if committer or commitdate:
                    env = dict(environ)
                    if committer:
                        env["GIT_COMMITTER_NAME"] = committer.name
                        env["GIT_COMMITTER_EMAIL"] = committer.email
                    if commitdate:
                        env["GIT_COMMITTER_DATE"] = commitdate
                    check_call(["git"] + cmds, env=env)
                else:
                    check_call(["git"] + cmds)

            git(["init"])
            next_release = 0
            next_rc = 0
            release_tags = []
            rc_tags = {}
            for i, c in enumerate(self._commits):
                # First, clear the directory
                for f in listdir("."):
                    if f != ".git":
                        if isdir(f):
                            rmtree(f)
                        else:
                            unlink(f)
                # Insert the files specified in the commits filetree
                for f, content in c.filetree.iteritems():
                    dn, fn = pathsplit(f)
                    if dn and not exists(dn):
                        makedirs(dn)
                    with file(f, "w") as fd:
                        fd.write(content)
                # Perform the commit
                git("add -A .".split())
                commitmsg = "Commit {}\n\nCommit message\n\n".format(i)
                for signer in c.signoff:
                    commitmsg += "Signed-off-by: {}\n".format(str(signer))
                git(["commit",
                     "--author", str(c.author),
                     "--date", c.datetime,
                     "-m", commitmsg],
                    committer=c.committer, commitdate=c.datetime)
                # Tag the commit
                for tag in c.tags:
                    name = "v{}_{}".format(next_release, tag.type)
                    if tag.type == "rc":
                        name += "_{}".format(next_rc)
                        rc_tags.setdefault(next_release, name) # do not overwrite first rc
                        next_rc += 1
                    elif tag.type == "release":
                        release_tags.append(name)
                        next_release += 1
                    git(["tag", name])
            # Create codeface test configuration
            configuration = dedent("""
            ---
            project: {project}
            repo: {project} # Relative to git-dir as specified on the command line
            description: {project} Description
            mailinglists:
                -   name: {project}.dev1
                    type: dev
                    source: generated
                -   name: {project}.dev2
                    type: dev
                    source: generated
                -   name: {project}.user1
                    type: user
                    source: generated
                -   name: {project}.user2
                    type: user
                    source: generated
            revisions: {release_tags}
            rcs : {rctags}
            tagging: {tagging}
            """.format(release_tags=str(release_tags),
                       tagging=self._tagging,
                       rctags=str([rc_tags.get(i, release_tags[i]) for i in range(len(release_tags))]),
                       project=basename(self.directory)
                )
            )
            with file(self.codeface_conf, "w") as fd:
                fd.write(configuration)
            for ml_name, ml_file in self.mboxes:
                with file(ml_file, "w") as fd:
                    fd.write(self.mbox_contents(ml_name))
        finally:
            chdir(cwd)
Ejemplo n.º 48
0
DEBUG_POSTFIX = '_d' if DEBUG else ''

USE_DEVENV = not VS_EXPRESS # devenv.exe does not come with the free compiler



makepath = lambda *p: normpath(pathjoin(abspath(p[0]), *p[1:]))

if USE_PYTHON_26:
    #
    # Python 2.6 Trunk
    #
    PYTHON_SVN_REVISION = '72606'
    PYTHON_SVN_URL = '%s/python/branches/release26-maint@%s' % (PYTHON_PROJECTS_SVN, PYTHON_SVN_REVISION)
    PCBUILD_DIR = 'PCBuild'
    PYTHON_LIBDIR  =  normpath(pathjoin(abspath(pathsplit(__file__)[0]), PYTHON_DIR, PCBUILD_DIR))
    PYTHON_PGO_DIR = 'Win32-pgo'
    PYTHON_PGI_LIBDIR  =  normpath(pathjoin(abspath(pathsplit(__file__)[0]), PYTHON_DIR, PCBUILD_DIR, 'Win32-pgi'))
    PYTHON_PGO_LIBDIR  =  normpath(pathjoin(abspath(pathsplit(__file__)[0]), PYTHON_DIR, PCBUILD_DIR, PYTHON_PGO_DIR))
    PYTHON_EXE     =  normpath(pathjoin(PYTHON_LIBDIR, r'python%s.exe' % DEBUG_POSTFIX))
    PYTHON_EXE_PGI =  normpath(pathjoin(PYTHON_PGI_LIBDIR, r'python.exe'))
    PYTHON_EXE_PGO =  normpath(pathjoin(PYTHON_PGO_LIBDIR, r'python.exe'))
    PYTHON_VER = '26'
    PYTHON_BZIP = ('bzip2-1.0.5', '%s/external/bzip2-1.0.5' % PYTHON_PROJECTS_SVN)
    PYTHON_SQLITE = ('sqlite-3.5.9', '%s/external/sqlite-3.5.9/' % PYTHON_PROJECTS_SVN)
else:
    #
    # Python 2.5 Maintenance Branch
    #
    PYTHON_SVN_REVISION = 'HEAD'
    PYTHON_SVN_URL = '%s/python/branches/release25-maint@%s' % (PYTHON_PROJECTS_SVN, PYTHON_SVN_REVISION)
Ejemplo n.º 49
0
#!/usr/bin/env python

from os import system, listdir
from os.path import join as pathjoin, split as pathsplit, dirname, abspath
from sys import path as pythonpath, argv, exit

#Establish some useful global variables.
#
#We assume that this program is located in the root directory of
#the instance of the pub package that we are testing.  Given
#that assumption, we find everything else by building paths.
#relative to that root.
#
pubdir = abspath(dirname(argv[0]))
PYTHONPATH = pathsplit(pubdir)[0]
pythonpath.insert(0, PYTHONPATH)
testdir = pathjoin(pubdir, 'test')
gamesdir = pathjoin(pubdir, 'games')

#This routine will run a particular test game and diff the output.
def runtest(name, gamepath, makeoutput):
    inputfile = pathjoin(testdir, '%s-input' % name)
    outputfile = pathjoin(testdir, '%s-output' % name)
    if makeoutput: testfile = outputfile
    else: testfile = pathjoin(testdir, '%s-testout' % name)
    system('PYTHONPATH=%s PUBTESTING=true python pubrun %s <%s >%s' % (PYTHONPATH,
        gamepath, inputfile, testfile))
    system('rm pub.dat')
    if makeoutput: return
    system("sed -e 's/0x.*c//' %s > diff1" % testfile)
    system("sed -e 's/0x.*c//' %s > diff2" % outputfile)