예제 #1
0
파일: template.py 프로젝트: mrcjhurst/evoke
  def __call__(self, fn):
    ""
#    klass = rx.split(inspect.stack()[1][3])[1].lower()
    k=inspect.stack()[1][3]
#    print ">>>>>>>>>>>>>>>>>",k,fn.func_name
    klass = rx.split(k)[1] #we assume consistent capitalization of class / object names
#    print ">>>>klass>>>>>>>>",klass
#    print ">>>>self>>>>>>>>",self
    fname = fn.func_name

    css = []
    csspath = '../htdocs/site/%s_%s.css' % (klass, fname)
    if lexists(csspath):
      css.append('/site/%s_%s.css' % (klass, fname))
    csspath = '../htdocs/site/%s.css' % klass
    if lexists(csspath):
      css.append('/site/%s.css' % klass)

    name = '%s_%s.xml' % (klass, fname)
    template = Render(name, *self.a, **self.k)
    def function(inner_self, req):
      "a typical template"
      req['css'] = css
      req['meta'] = '%s_%d_%s' % (klass, getattr(inner_self, 'uid', 0), fname)
      fn(inner_self, req)
      return template(inner_self, req)
    return function
예제 #2
0
파일: test_link.py 프로젝트: ESSS/conda
    def test_soft_link(self):
        path1_real_file = join(self.test_dir, 'path1_real_file')
        path2_symlink = join(self.test_dir, 'path2_symlink')
        touch(path1_real_file)
        assert isfile(path1_real_file)
        assert not islink(path1_real_file)

        symlink(path1_real_file, path2_symlink)
        assert exists(path2_symlink)
        assert lexists(path2_symlink)
        assert islink(path2_symlink)

        assert readlink(path2_symlink).endswith(path1_real_file)
        # for win py27, readlink actually gives something that starts with \??\
        # \??\c:\users\appveyor\appdata\local\temp\1\c571cb0c\path1_real_file

        assert stat_nlink(path1_real_file) == stat_nlink(path2_symlink) == 1

        os.unlink(path1_real_file)
        assert not isfile(path1_real_file)
        assert not lexists(path1_real_file)
        assert not exists(path1_real_file)

        assert lexists(path2_symlink)
        if not (on_win and PY2):
            # I guess I'm not surprised this exist vs lexist is different for win py2
            #   consider adding a fix in the future
            assert not exists(path2_symlink)

        os.unlink(path2_symlink)
        assert not lexists(path2_symlink)
        assert not exists(path2_symlink)
예제 #3
0
 def create_payloads(self):
     '''
     Create all missing data payloads in current directory
     Doesn't compute md5 during creation because tarball can
     be created manually
     Also create symlink to versionned payload
     '''
     arrow("Creating payloads")
     for payload_name in self.select_payloads():
         paydesc = self.describe_payload(payload_name)
         if exists(paydesc["link_path"]):
             continue
         arrow(payload_name, 1)
         try:
             # create non versionned payload file
             if not exists(paydesc["dest_path"]):
                 if paydesc["isdir"]:
                     self.create_payload_tarball(paydesc["dest_path"],
                                                 paydesc["source_path"],
                                                 paydesc["compressor"])
                 else:
                     self.create_payload_file(paydesc["dest_path"],
                                              paydesc["source_path"],
                                              paydesc["compressor"])
             # create versionned payload file
             if lexists(paydesc["link_path"]):
                 unlink(paydesc["link_path"])
             symlink(paydesc["dest_path"], paydesc["link_path"])
         except Exception as e:
             # cleaning file in case of error
             if exists(paydesc["dest_path"]):
                 unlink(paydesc["dest_path"])
             if lexists(paydesc["link_path"]):
                 unlink(paydesc["link_path"])
             raise ISError(u"Unable to create payload %s" % payload_name, e)
예제 #4
0
파일: create.py 프로젝트: ESSS/conda
def create_link(src, dst, link_type=LinkType.hardlink, force=False):
    if link_type == LinkType.directory:
        # A directory is technically not a link.  So link_type is a misnomer.
        #   Naming is hard.
        mkdir_p(dst)
        return

    if not lexists(src):
        raise CondaError("Cannot link a source that does not exist. %s" % src)

    if lexists(dst):
        if not force:
            maybe_raise(BasicClobberError(src, dst, context), context)
        log.info("file exists, but clobbering: %r" % dst)
        rm_rf(dst)

    if link_type == LinkType.hardlink:
        if isdir(src):
            raise CondaError("Cannot hard link a directory. %s" % src)
        link(src, dst)
    elif link_type == LinkType.softlink:
        _do_softlink(src, dst)
    elif link_type == LinkType.copy:
        _do_copy(src, dst)
    else:
        raise CondaError("Did not expect linktype=%r" % link_type)
예제 #5
0
def main(argv):
    EXCLUDES.append(path.join('.', path.basename(argv[0])))
    target = os.environ['HOME']
    target_prefix = '.'

    opts = argv[1:]
    extras = '--extras' in opts

    dolink('.', target, target_prefix, excludes=[
        path.join('.', 'bin')
    ])
    mkdir(path.join(target, 'bin'))
    dolink('bin', path.join(target, 'bin'))

    # pull in hgexts
    hgexts = path.join(target, '.hgexts')
    mkdir(hgexts)
    if not path.lexists(path.join(hgexts, 'hg-git')):
        system('hg clone ssh://[email protected]/durin42/hg-git', hgexts)
    if not path.lexists(path.join(hgexts, 'hg-remotebranches')):
        system('hg clone ssh://[email protected]/durin42/hg-remotebranches', hgexts)

    # pull in sandboxes
    sandbox = path.join(target, 'sandbox')
    mkdir(sandbox)
    if not path.lexists(path.join(sandbox, 'mercurial-cli-templates')):
        system('hg clone ssh://[email protected]/sjl/mercurial-cli-templates/', sandbox)

    return 0
예제 #6
0
def dolink(dirpath, target, target_prefix='', excludes=None):
    for fn in sorted(os.listdir(dirpath)):
        localfn = path.join(dirpath, fn)
        if localfn in EXCLUDES:
            continue
        if excludes and localfn in excludes:
            continue

        targetfn = path.join(target, target_prefix + fn)
        localfnabs = path.abspath(localfn)
        if path.isdir(localfn):
            if localfn in MKDIR_INSTEADOF_LINK:
                mkdir(targetfn)
                dolink(localfn, targetfn)

            else:
                if path.lexists(targetfn):
                    if not (path.islink(targetfn) \
                       and os.readlink(targetfn) == localfnabs):
                        warn('exists: diff -u %s %s' % (targetfn, localfn))
                else:
                    os.symlink(localfnabs, targetfn)

        else:
            if path.lexists(targetfn):
                if not (path.islink(targetfn) \
                   and os.readlink(targetfn) == localfnabs):
                    warn('exists: diff -u %s %s' % (targetfn, localfn))
            else:
                os.symlink(localfnabs, targetfn)
예제 #7
0
    def trash(self):
        if islink(self.path):
            self.remove()
            return True

        elif exists(self.path):
            base    = basename(self.path)
            target  = base
            ftarget = join(expanduser("~/.Trash"), target)
            index   = 1

            while lexists(ftarget):
                target = "%s-%d" % (base, index)
                index += 1
                ftarget = join(expanduser("~/.Trash"), target)

            try:
                l.debug("Calling: os.rename('%s', '%s')" % (self.path, ftarget))
                if not self.dryrun:
                    os.rename(self.path, ftarget)
            except:
                if self.sudo:
                    try:
                        run('sudo /bin/mv %%s "%s"' % ftarget,
                            self.path, self.dryrun)
                    except:
                        l.error("Error moving file with sudo: %s" % self)

            if self.dryrun or not lexists(self.path):
                return True
            else:
                l.error("Could not trash file: %s\n" % self)

        return False
예제 #8
0
파일: create.py 프로젝트: rlugojr/conda
def create_link(src, dst, link_type=LinkType.hardlink, force=False):
    if link_type == LinkType.directory:
        # A directory is technically not a link.  So link_type is a misnomer.
        #   Naming is hard.
        mkdir_p(dst)
        return

    if not lexists(src):
        raise CondaError("Cannot link a source that does not exist. %s" % src)

    if lexists(dst):
        if not force:
            maybe_raise(BasicClobberError(src, dst, context), context)
        log.info("file exists, but clobbering: %r" % dst)
        rm_rf(dst)

    if link_type == LinkType.hardlink:
        if isdir(src):
            raise CondaError("Cannot hard link a directory. %s" % src)
        link(src, dst)
    elif link_type == LinkType.softlink:
        symlink(src, dst)
    elif link_type == LinkType.copy:
        # on unix, make sure relative symlinks stay symlinks
        if not on_win and islink(src):
            src_points_to = readlink(src)
            if not src_points_to.startswith('/'):
                # copy relative symlinks as symlinks
                symlink(src_points_to, dst)
                return
        shutil.copy2(src, dst)
    else:
        raise CondaError("Did not expect linktype=%r" % link_type)
예제 #9
0
def _truncate_spaces(
        nshell, n1, n2, dirpaths, scalefactor, remove_protons, force=False):
    """For multiple directories, perform the operation of truncate_space
    :param nshell: major oscillator shell (0=s, 1=p, ...)
    :param n1: max allowed one-particle state
    :param n2: max allowed two-particle state
    :param dirpaths: Paths to the destination directories
    """
    dirpaths = list(dirpaths)
    d0 = dirpaths.pop()
    # truncate interaction once
    fpath0, lpath0 = _truncate_space(
        nshell=nshell, n1=n1, n2=n2, dpath_elt=d0, scalefactor=scalefactor,
        remove_protons=remove_protons, force=force,
    )
    fname_tbme = path.split(fpath0)[1]
    lname_tbme = path.split(lpath0)[1]
    # link truncated interaction file to the rest of the directories
    for d in dirpaths:
        dst_path = path.join(d, fname_tbme)
        sl_path = path.join(d, lname_tbme)
        try:
            if path.exists(sl_path) or path.lexists(sl_path):  # symlink exists
                remove(sl_path)
            if not (path.exists(dst_path) or path.lexists(dst_path)):
                link(fpath0, dst_path)
            elif force:
                remove(dst_path)
                link(fpath0, dst_path)
        except OSError:
            print 'Could not link %s to %s.' % (fpath0, dst_path)
            raise
        symlink(dst_path, sl_path)
    return fname_tbme, lname_tbme
예제 #10
0
  def reload(self,force=False):
    "reload the template, if it is not in the cache, or the timestamp doesn't match"
    cob=self.cache.get(self.key,CacheObject())
    if (cob.path and not force):#is it in the cache?
      if (cob.timestamp == os.stat(cob.path).st_mtime):# does the timestamp match?
#        print ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> MATCHED",self.key
        self.path=cob.path
        self.wrapperpath=cob.wrapperpath
        if hasattr(cob,'template'):
          self.template=cob.template
	self.saxevents=cob.saxevents
        return False # the correct template objects are now copied from cache
    else:  #ie do this first time only.. get the paths  (we could not do this before now, because we didn't have the base and app filepaths)
      cob.path=self.app_filepath+self.filename#is there local template?
      if not lexists(cob.path):
        cob.path=self.base_filepath+self.filename# use the base template
      cob.wrapperpath=self.app_filepath+self.wrapper# is there a local wrapper?
      if not lexists(cob.wrapperpath):
        cob.wrapperpath=self.base_filepath+self.wrapper# use the base wrapper
#    print "WRAPPERPATH=",self.wrapperpath,self.app_filepath,self.wrapper
#    print ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>RELOAD>>",self.key
    self.path=cob.path
    self.wrapperpath=cob.wrapperpath
    self.template=cob.template= self.wrapTemplate(self.path, self.wrapperpath)
    self.include(self.template.childNodes[0])
    self.timestamp=cob.timestamp=os.stat(cob.path).st_mtime            
    self.cache[self.key]=cob #cache the template
    self.cob=cob #we need this in render.py for sax data
    # return true to indicate reload has happened
    return True
예제 #11
0
 def _installFile(self,source,target,removeSource=True,symlink=False):
     """Copy, move or create symlink source file to target. Save old."""
     def getLinkForTarget(target):
         """Get symlinks from target dirname which point to target"""
         baseDir = path.dirname(path.normpath(target))
         linkFiles = filter(path.islink,map(lambda x:path.join(baseDir,x),
                                            os.listdir(baseDir)))
         return filter(lambda x:path.join(baseDir,
                                   os.readlink(x))==target, linkFiles)
     # raise IOError if source is not exists
     open(source,'r').close()
     targetLinkFiles = getLinkForTarget(target)
     oldtarget = ""
     if path.lexists(target):
         oldtarget = "%s.old" % target
         if path.lexists(oldtarget):
             oldTargetLinkFiles = getLinkForTarget(oldtarget)
             map(os.unlink,oldTargetLinkFiles)
             os.unlink(oldtarget)
         os.rename(target,oldtarget)
     if symlink:
         if path.dirname(source)==path.dirname(target):
             os.symlink(path.basename(source),target)
         else:
             os.symlink(source,target)
     elif removeSource:
         os.rename(source,target)
     else:
         copy_with_perm(source,target)
     if oldtarget:
         map(os.unlink,targetLinkFiles)
         map(lambda x:os.symlink(path.basename(oldtarget),x),targetLinkFiles)
예제 #12
0
def copyfilesfromsvn(filelist,svnbasepath,localpath,revision,samefold=False):
    shutil.rmtree(localpath,True)
    if not path.lexists(localpath):
        os.makedirs(localpath)
    os.chdir(localpath)
    prefix = r'/IMClient-RV/'
    prefix2 = r'/revolution_min/'
    prefix3 = r'/Private/'
    haserr = False
    for onefile in filelist:
        pos = onefile.find(prefix)
        if pos==-1:
            pos = onefile.find(prefix2)
            if pos == -1:
                pos = onefile.find(prefix3)
                if pos == -1:
                    continue
        if samefold:
            localfilepath = localpath + path.basename(onefile)
            if path.lexists(localfilepath):
                localfilepath = localfilepath + '-dup'
            cmdstr = 'svn export --force -r '+ str(revision) + ' ' + svnbasepath + onefile + ' ' + localfilepath
        else:
            localfilepath = localpath + onefile.replace(onefile[:pos], '', 1)
            if not path.lexists(path.dirname(localfilepath)):
                os.makedirs(path.dirname(localfilepath))
            cmdstr = 'svn export --force -r '+ str(revision) + ' ' + svnbasepath + onefile + ' ' + localfilepath
        try:
            retcode = subprocess.call(cmdstr,shell=True)
            if retcode != 0:
                haserr = True
        except OSError,e:
            haserr = True
예제 #13
0
파일: __init__.py 프로젝트: mdavezac/LaDa
def prep_symlink(outdir, workdir, filename=None):
  """ Creates a symlink between outdir and workdir.

      If outdir and workdir are the same directory, then bails out.
      Both directories should exist prior to call.
      If filename is None, then creates a symlink to workdir in outdir called
      ``workdir``. Otherwise, creates a symlink in workdir called filename.
      If a link ``filename`` already exists, deletes it first.
  """
  from os import remove, symlink
  from os.path import samefile, lexists, abspath, join
  from ..misc import Changedir
  if samefile(outdir, workdir): return
  if filename is None:
    with Changedir(workdir) as cwd: 
      if lexists('workdir'):
        try: remove('workdir')
        except OSError: pass
      try: symlink(abspath(workdir), abspath(join(outdir, 'workdir')))
      except OSError: pass
    return

  with Changedir(workdir) as cwd: 
    if lexists(filename):
      try: remove(filename)
      except OSError: pass
    try: symlink( abspath(join(outdir, filename)),
                  abspath(join(workdir, filename)) )
    except OSError: pass
예제 #14
0
파일: all.py 프로젝트: ActiveState/applib
def test_sh_rm_broken_symlink():
    with sh.tmpdir():
        os.symlink('afile-notexist', 'alink')
        assert not path.exists('alink')
        assert path.lexists('alink')
        sh.rm('alink')
        assert not path.lexists('alink')
예제 #15
0
def copyfilesfromlocal(filelist,localsrcpath,localdestpath,samefold=False):
    shutil.rmtree(localdestpath,True)
    if not path.lexists(localdestpath):
        os.makedirs(localdestpath)
    os.chdir(localdestpath)
    prefix = r'/IMClient-RV/'
    prefix2 = r'/revolution_min/'
    prefix3 = r'/Private/'
    #prefix4 = '/modules/'
    #prefix = '/IMClient/Branches_tb/20110325_Base6.6002C_security'
    #prefix2 = '/IMClient/Branches_tb/20110420_Base6.6003C_security2'
    for onefile in filelist:
        pos = onefile.find(prefix)
        if pos==-1:
            pos = onefile.find(prefix2)
            if pos == -1:
                pos = onefile.find(prefix3)
                if pos == -1:
                    continue
        localfilepath = onefile.replace(onefile[:pos],'',1)
        try:
            if samefold:
                copydest = localdestpath + path.basename(localfilepath)
                if path.lexists(copydest):
                    copydest = copydest + '-dup'
                shutil.copy2(localsrcpath+localfilepath,copydest)
            else:
                copydest = localdestpath+localfilepath
                if not path.lexists(path.dirname(copydest)):
                    os.makedirs(path.dirname(copydest))
                shutil.copy2(localsrcpath+localfilepath,copydest)
        except IOError,e:
            pass
예제 #16
0
    def test_simple_LinkPathAction_softlink(self):
        source_full_path = make_test_file(self.pkgs_dir)
        target_short_path = source_short_path = basename(source_full_path)

        correct_sha256 = compute_sha256sum(source_full_path)
        correct_size_in_bytes = getsize(source_full_path)
        path_type = PathType.hardlink

        source_path_data = PathDataV1(
            _path = source_short_path,
            path_type=path_type,
            sha256=correct_sha256,
            size_in_bytes=correct_size_in_bytes,
        )

        axn = LinkPathAction({}, None, self.pkgs_dir, source_short_path, self.prefix,
                             target_short_path, LinkType.softlink, source_path_data)

        assert axn.target_full_path == join(self.prefix, target_short_path)
        axn.verify()
        axn.execute()
        assert isfile(axn.target_full_path)
        assert islink(axn.target_full_path)
        assert stat_nlink(axn.target_full_path) == 1

        axn.reverse()
        assert not lexists(axn.target_full_path)
        assert lexists(source_full_path)
예제 #17
0
파일: role.py 프로젝트: mehulsbhatt/conpaas
    def mount(self, mkfs):
        self.state = S_STARTING
        devnull_fd = open(devnull,'w')
        # waiting for our block device to be available
        dev_found = False
        dev_prefix = self.dev_name.split('/')[2][:-1]

        for attempt in range(1, 11):
            sql_logger.info("Galera node waiting for block device %s" % self.dev_name)
            if lexists(self.dev_name):
                dev_found = True
                break
            else:
                # On EC2 the device name gets changed 
                # from /dev/sd[a-z] to /dev/xvd[a-z]
                if lexists(self.dev_name.replace(dev_prefix, 'xvd')):
                    dev_found = True
                    self.dev_name = self.dev_name.replace(dev_prefix, 'xvd')
                    break

            time.sleep(10)

        # create mount point
        run_cmd(self.mkdir_cmd)

        if dev_found:
            sql_logger.info("Galera node has now access to %s" % self.dev_name)

            # prepare block device
            if mkfs:
                sql_logger.info("Creating new file system on %s" % self.dev_name)
                self.prepare_args = ['mkfs.ext4', '-q', '-m0', self.dev_name]
                proc = Popen(self.prepare_args, stdin=PIPE, stdout=devnull_fd,
                        stderr=devnull_fd, close_fds=True)

                proc.communicate(input="y") # answer interactive question with y
                if proc.wait() != 0:
                    sql_logger.critical('Failed to prepare storage device:(code=%d)' %
                            proc.returncode)
                else:
                    sql_logger.info('File system created successfully')
            else:
                sql_logger.info(
                  "Not creating a new file system on %s" % self.dev_name)
                time.sleep(10)

            # mount
            self.mount_args = ['mount', self.dev_name, self.mount_point]
            mount_cmd = ' '.join(self.mount_args)
            sql_logger.debug("Running command '%s'" % mount_cmd)
            _, err = run_cmd(mount_cmd)

            if err:
                sql_logger.critical('Failed to mount storage device: %s' % err)
            else:
                sql_logger.info("OSD node has prepared and mounted %s" % self.dev_name)
        else:
            sql_logger.critical("Block device %s unavailable, falling back to image space" 
                    % self.dev_name)
예제 #18
0
파일: all.py 프로젝트: ActiveState/applib
def test_sh_rm_symlink():
    with sh.tmpdir():
        with open('afile', 'w') as f: f.close()
        assert path.exists('afile')
        os.symlink('afile', 'alink')
        assert path.lexists('alink')
        sh.rm('alink')
        assert not path.lexists('alink')
예제 #19
0
파일: addon.py 프로젝트: molandtoxx/odooenv
 def is_saned(self, environment):
     """
     Return true if the addon is saned.
     """
     path = self.environment_path(environment)
     return (lexists(path) and
             os.path.exists(realpath(path)) or
             not lexists(path))
예제 #20
0
파일: update.py 프로젝트: ESSS/conda
def rename(source_path, destination_path, force=False):
    if lexists(destination_path) and force:
        rm_rf(destination_path)
    if lexists(source_path):
        log.trace("renaming %s => %s", source_path, destination_path)
        os_rename(source_path, destination_path)
    else:
        log.trace("cannot rename; source path does not exist '%s'", source_path)
예제 #21
0
파일: ahrd.py 프로젝트: radaniba/jcvi
def batch(args):
    """
    %prog batch splits output

    The arguments are two folders.
    Input FASTA sequences are in splits/.
    Output csv files are in output/.

    Must have folders swissprot/, tair/, trembl/ that contains the respective
    BLAST output. Once finished, you can run, for example:

    $ parallel java -Xmx2g -jar ~/code/AHRD/dist/ahrd.jar {} ::: output/*.yml
    """
    p = OptionParser(batch.__doc__)

    ahrd_weights = { "blastp": [0.5, 0.3, 0.2],
                     "blastx": [0.6, 0.4, 0.0]
                   }
    blast_progs = tuple(ahrd_weights.keys())

    p.add_option("--path", default="~/code/AHRD/",
                 help="Path where AHRD is installed [default: %default]")
    p.add_option("--blastprog", default="blastp", choices=blast_progs,
                help="Specify the blast program being run. Based on this option," \
                   + " the AHRD parameters (score_weights) will be modified." \
                   + " [default: %default]")
    p.add_option("--iprscan", default=None,
                help="Specify path to InterProScan results file if available." \
                   + " If specified, the yml conf file will be modified" \
                   + " appropriately. [default: %default]")

    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    splits, output = args
    mkdir(output)

    bit_score, db_score, ovl_score = ahrd_weights[opts.blastprog]

    for f in glob("{0}/*.fasta".format(splits)):
        fb = op.basename(f).rsplit(".", 1)[0]
        fw = open(op.join(output, fb + ".yml"), "w")

        path = op.expanduser(opts.path)
        dir = op.join(path, "test/resources")
        outfile = op.join(output, fb + ".csv")
        interpro = iprscanTemplate.format(opts.iprscan) if opts.iprscan else ""

        print >> fw, Template.format(dir, fb, f, outfile, bit_score, db_score, ovl_score, interpro)

    if opts.iprscan:
        if not op.lexists("interpro.xml"):
            symlink(op.join(iprscan_datadir, "interpro.xml"), "interpro.xml")

        if not op.lexists("interpro.dtd"):
            symlink(op.join(iprscan_datadir, "interpro.dtd"), "interpro.dtd")
예제 #22
0
파일: rm.py 프로젝트: joseph8th/pybrat
    def run_command(self, args):
        # rm venv 'project' ONLY
        if args.brew:
            vname = args.project
            print "Deleting given pythonbrew virtualenv '{}' ONLY...".format(vname)
            # check if target venv is linked in pybrat project
            proj_d = get_project_list({'pybrat': True,})
            for proj in proj_d.keys():
                if vname in proj_d[proj]['venv'].keys():
                    pv_proj_dir = proj_d[proj]['srcpath']
                    print "Venv '{0}' is being used by project '{1}'.".format(vname, proj)
                    print "Deleting it will break a link unless you delete the link too."
                    if get_input_bool("Cleanup the pybrat project as well? [y/N] ",
                                      default_answer=False):
                        # rm venv and cleanup project
                        if not pb_rmvenv(vname, pv_proj_dir):
                            print "Remove Error: {} was not deleted".format(args.project)
                            return False
                        return True

            # venv not in any pybrat project so no worries...
            if not pb_rmvenv(vname):
                print "Remove Error: {} was not deleted".format(args.project)
                return False
            return True


        # still here? then delete the rest of the pybrat project...
        pv_projd = join(PYBRAT_PROJD, args.project)

        if not lexists(pv_projd):
            print "Warning: {} does not exist".format(pv_projd)
            return False

        print "==> Removing project '{}'...".format(args.project)

        # if 'pythonbrew venv delete project'...?
        if args.venv:
            print "Deleting linked virtualenv(s)..." 
            if not pv_rmvenv(pv_projd):
                print "Remove Error: {} was not deleted".format(pv_projd)
                return False

        # remove .pybrat dir in user's project dir
        pv_subd = join(readlink(pv_projd), ".{}".format(PYBRAT_PROG))

        if not lexists(pv_subd):
            print "Warning: {} does not exist.".format(pv_subd)
        else:
            rmtree(pv_subd, ignore_errors=True)
            print "Removed .pybrat subdirectory {}".format(pv_subd)

        # remove pybrat project link in .pybrat_projects/
        remove(pv_projd)
        print "Removed pybrat project {}".format(pv_projd)

        # all done deleting shit? ok...
        return True
예제 #23
0
파일: Project.py 프로젝트: spirineta/protk
    def checkRequiredFiles(self):
        '''makes sure textgrid files exist for each wav file, and that a config file exists'''
        #check for config file, and load it.  Make it part of options.
        #load config file in the 2 required formats
        self.options.config = {}
        self.loadConfig()
        self.options.praatConfig = []
        self.loadPraatConfig()
        
        #for truth and each other directory:
            #make sure the directories exist
            #check for txtgrid files
            
        #TRUTH
        if not path.lexists(self.options.directory + self.options.truth + "/"):
            print "error, the",self.options.truth,"directory doesn't exist (it's where your 'truth' files should be)! Exiting."
            exit()

        toExtract = self.options.source
        if toExtract:
            for folder in toExtract:
                if not path.isdir(self.options.directory + folder + "/"):     
                    print "error, the",self.options.directory + folder + "/","directory doesn't exist! You listed it as a folder to process. Exiting."
                    exit()
                for fname in self.fileList:
                    if not path.isfile(self.options.directory + folder + "/" + fname + ".txtgrid"):
                        print "error, the required",fname,".txtgrid file doesn't exist in ./",folder,"/"
                        print "exiting."
                        exit()
                    if not self.options.extractPraat and not path.isdir(self.options.directory + 'formatted/' + fname + '_formatted/'):
                        print "You first need to extract prosodic information with Praat.  Re-run with the -x flag."
                        exit()

        #check to make sure directories to be cleaned exist
        #if self.options.cleanOldResults:
        #    for folder in self.options.cleanOldResults:
        #        if not path.isdir(self.options.directory + "extracted/" + folder + "/"):
        #            print "error, the",folder,"directory doesn't exist! Exiting."
        #            exit()
                
                
        praatscriptpath = os.path.abspath(__file__)
        s = praatscriptpath.split('/')
        praatscriptpath = '/' + '/'.join(s[:-1]) +   "/extractInfoSingle.praat"
        self.options.praatscriptpath = praatscriptpath
        #make sure praat script is there
        if not path.isfile(praatscriptpath):
            print "error, the praat script isn't in the same directory as the rest of the package files! Printing file list and exiting."
            print os.system("ls")
            print path.isfile(praatscriptpath)
            print self.options.praatscriptpath
            exit()
            
        #folder for db files
        if not path.lexists(self.options.directory + "db/"):
            mkdir(self.options.directory + "db/")
        if not path.lexists(self.options.directory + "arff/"):
            mkdir(self.options.directory + "arff/")
예제 #24
0
파일: evo.py 프로젝트: howiemac/evoke
 def __call__(self, ob, req, wrapper='', gettext=lambda x: x):
     """ generate HTML
 - allow for multiple apps having different template versions within each Evo instance - we ony get the app data at call time
 - wrapper can be passed as req.wrapper or as wrapper: wrapper=None means no wrapper
 """
     self.gettext = gettext
     #get the template path for this app
     self.key = ob.Config.app + '.' + self.filename
     self.path = self.pathcache.get(self.key, "")
     if not self.path:  # first time only.. get the paths
         # note: we couldn't do this earlier, as we lacked the evoke and app filepaths
         # firstly: use the local class template, if there is one
         klass = self.filename.split("_", 1)[0]
         self.path = '%s%s/evo/%s' % (ob.Config.app_filepath, klass,
                                      self.filename)
         # otherwise, is there a local template?
         if not lexists(self.path):
             self.path = ob.Config.app_filepath + 'evo/' + self.filename
             # otherwise, is there a class template in evoke (base)?
             if not lexists(self.path):
                 self.path = '%s%s/evo/%s' % (ob.Config.evoke_filepath,
                                              klass, self.filename)
                 # otherwise, use the evoke template
                 if not lexists(self.path):
                     self.path = ob.Config.evoke_filepath + 'evo/' + self.filename
                     # if that doesn't exist, raise an error
                     if not lexists(self.path):
                         raise EvoTemplateNotFound(self.filename)
         self.pathcache[self.key] = self.path
     #get the CacheObject for this path, containing the python code and timestamp
     cob = self.pycache.get(self.path, EvoCacheObject())
     #and parse the template to a python expression
     # ... unless we already have python code and the timestamp matches
     if (not cob.pyc) or (cob.timestamp != stat(self.path).st_mtime):
         # set the timestamp
         cob.timestamp = stat(self.path).st_mtime
         # parse the template into python code
         cob.pyc = self.parse(self.path, ob, req)
         # compile the python code to the cache
         #      print("compiling %s : " % self.filename,cob.pyc)
         if not debug:
             try:
                 cob.pyc = compile(cob.pyc, '<string>', 'eval')
             except SyntaxError as inst:
                 p = inst.offset
                 t = inst.text
                 raise EvoSyntaxError(
                     "char %s" % p, "evo pycode for %s" % self.filename,
                     t[max(0, p - 40):p], t[p:min(p + 40, len(t) - 1)])
     # sort out the wrapper
     if wrapper is not None:
         wrapper = wrapper or req.get('wrapper', 'wrapper.evo')
     # and run the python code from the cache
     res = self.wrap(cob.pyc, ob, req,
                     wrapper) if wrapper else self.evaluate(
                         cob.pyc, ob, req)
     return res
예제 #25
0
 def tearDown(self):
     rm_rf(self.prefix)
     if not (on_win and PY2):
         # this assertion fails for the Softlink action windows tests
         # line 141 in backoff_rmdir
         #  exp_backoff_fn(rmtree, path, onerror=retry, max_tries=max_tries)
         # leaves a directory self.prefix\\Scripts that cannot be accessed or removed
         assert not lexists(self.prefix)
     rm_rf(self.pkgs_dir)
     assert not lexists(self.pkgs_dir)
예제 #26
0
def make_allegro_build_directory():
    os.chdir(root_path)
    os.chdir('deps')
    os.chdir('builds')
    if path.lexists('allegro_build'):
        i = 1
        while path.lexists('allegro_build ({0})'.format(i)):
            i += 1
        shutil.move('allegro_build', 'allegro_build ({0})'.format(i))
    os.mkdir('allegro_build')
예제 #27
0
def _perform(src, dst, action, actionname):
    if not op.lexists(src):
        print("Copying %s failed: it doesn't exist." % src)
        return
    if op.lexists(dst):
        if op.isdir(dst):
            shutil.rmtree(dst)
        else:
            os.remove(dst)
    print('%s %s --> %s' % (actionname, src, dst))
    action(src, dst)
예제 #28
0
파일: rename.py 프로젝트: bmdavll/rename
 def undoRename(self):
     if not self.renamed or not path.lexists(self.new_path):
         return False
     if path.lexists(self.path):
         if self.path not in _renamed or not _renamed[self.path].undoRename():
             return False
     try:
         os.rename(self.new_path, self.path)
         PrintError(self.arg + " not renamed", shortPath(self.new_path) + " exists")
         self.renamed = False
         return True
     except OSError as e:
         return False
예제 #29
0
    def test_simple_LinkPathAction_directory(self):
        target_short_path = join('a', 'nested', 'directory')
        axn = LinkPathAction({}, None, None, None, self.prefix,
                             target_short_path, LinkType.directory, None)
        axn.verify()
        axn.execute()

        assert isdir(join(self.prefix, target_short_path))

        axn.reverse()
        assert not lexists(axn.target_full_path)
        assert not lexists(dirname(axn.target_full_path))
        assert not lexists(dirname(dirname(axn.target_full_path)))
예제 #30
0
  def bringup(self, outdir, workdir):
    """ Sets up call to program. """
    from os.path import join, abspath, samefile, lexists
    from os import symlink, remove
    from ...misc import copyfile, Changedir
    from ... import CRYSTAL_propnames as filenames
    with Changedir(workdir) as cwd:
      # first copies file from current working directory
      for key, value in filenames.iteritems():
        copyfile( join(workdir, value.format('prop')), key, nocopyempty=True,
                  symlink=False, nothrow="never" )
      for key, value in filenames.iteritems():
        copyfile( join(self.input.directory, value.format('prop')), 
                  key, nocopyempty=True, symlink=False, 
                  nothrow="never" )

      # then creates input file.
      string = self.print_input(workdir=workdir, outdir=outdir, filework=True)
      string = string.rstrip() + '\n'
      with open('prop.d12', 'w') as file: file.write(string)
      header = ''.join(['#']*20)
      with open('prop.out', 'w') as file:
        file.write('{0} {1} {0}\n'.format(header, 'INPUT FILE'))
        file.write(string)
        file.write('{0} END {1} {0}\n'.format(header, 'INPUT FILE'))
        file.write('\n{0} {1} {0}\n'.format(header, 'FUNCTIONAL'))
        file.write(self.__repr__(defaults=False))
        file.write('\n{0} END {1} {0}\n'.format(header, 'FUNCTIONAL'))

    with Changedir(outdir) as cwd: pass
    if not samefile(outdir, workdir):
      # Creates symlink to make sure we keep working directory.
      with Changedir(outdir) as cwd:
        with open('prop.d12', 'w') as file: file.write(string)
        with open('prop.out', 'w') as file: pass
        with open('prop.err', 'w') as file: pass
        # creates symlink files.
        for filename in ['prop.err', 'prop.out']:
          if lexists(join(workdir, filename)):
            try: remove( join(workdir, filename) )
            except: pass
          symlink(abspath(filename), abspath(join(workdir, filename)))
            
        if lexists('workdir'): 
          try: remove('workdir')
          except: pass
        try: symlink(workdir, 'workdir')
        except: pass

    # creates a file in the directory, to say we are going to work here
    with open(join(outdir, '.pylada_is_running'), 'w') as file: pass
예제 #31
0
 def test_add_delete(self):
     # To test that .tar gets removed
     add_archive_content('1.tar', annex=self.annex, strip_leading_dirs=True, delete=True)
     assert_false(lexists(opj(self.annex.path, '1.tar')))
예제 #32
0
def run_command(cmd, dataset=None, inputs=None, outputs=None, expand=None,
                explicit=False, message=None, sidecar=None,
                extra_info=None,
                rerun_info=None,
                extra_inputs=None,
                rerun_outputs=None,
                inject=False):
    """Run `cmd` in `dataset` and record the results.

    `Run.__call__` is a simple wrapper over this function. Aside from backward
    compatibility kludges, the only difference is that `Run.__call__` doesn't
    expose all the parameters of this function. The unexposed parameters are
    listed below.

    Parameters
    ----------
    extra_info : dict, optional
        Additional information to dump with the json run record. Any value
        given here will take precedence over the standard run key. Warning: To
        avoid collisions with future keys added by `run`, callers should try to
        use fairly specific key names and are encouraged to nest fields under a
        top-level "namespace" key (e.g., the project or extension name).
    rerun_info : dict, optional
        Record from a previous run. This is used internally by `rerun`.
    extra_inputs : list, optional
        Inputs to use in addition to those specified by `inputs`. Unlike
        `inputs`, these will not be injected into the {inputs} format field.
    rerun_outputs : list, optional
        Outputs, in addition to those in `outputs`, determined automatically
        from a previous run. This is used internally by `rerun`.
    inject : bool, optional
        Record results as if a command was run, skipping input and output
        preparation and command execution. In this mode, the caller is
        responsible for ensuring that the state of the working tree is
        appropriate for recording the command's results.

    Yields
    ------
    Result records for the run.
    """
    if not cmd:
        lgr.warning("No command given")
        return

    rel_pwd = rerun_info.get('pwd') if rerun_info else None
    if rel_pwd and dataset:
        # recording is relative to the dataset
        pwd = normpath(opj(dataset.path, rel_pwd))
        rel_pwd = relpath(pwd, dataset.path)
    else:
        pwd, rel_pwd = get_command_pwds(dataset)

    ds = require_dataset(
        dataset, check_installed=True,
        purpose='tracking outcomes of a command')
    ds_path = ds.path

    lgr.debug('tracking command output underneath %s', ds)

    if not (rerun_info or inject):  # Rerun already takes care of this.
        # For explicit=True, we probably want to check whether any inputs have
        # modifications. However, we can't just do is_dirty(..., path=inputs)
        # because we need to consider subdatasets and untracked files.
        # MIH: is_dirty() is gone, but status() can do all of the above!
        if not explicit and ds.repo.dirty:
            yield get_status_dict(
                'run',
                ds=ds,
                status='impossible',
                message=(
                    'clean dataset required to detect changes from command; '
                    'use `datalad status` to inspect unsaved changes'))
            return

    cmd = normalize_command(cmd)

    inputs = GlobbedPaths(inputs, pwd=pwd,
                          expand=expand in ["inputs", "both"])
    extra_inputs = GlobbedPaths(extra_inputs, pwd=pwd,
                                # Follow same expansion rules as `inputs`.
                                expand=expand in ["inputs", "both"])
    outputs = GlobbedPaths(outputs, pwd=pwd,
                           expand=expand in ["outputs", "both"])

    # ATTN: For correct path handling, all dataset commands call should be
    # unbound. They should (1) receive a string dataset argument, (2) receive
    # relative paths, and (3) happen within a chpwd(pwd) context.
    if not inject:
        with chpwd(pwd):
            for res in prepare_inputs(ds_path, inputs, extra_inputs):
                yield res

            if outputs:
                for res in _install_and_reglob(ds_path, outputs):
                    yield res
                for res in _unlock_or_remove(ds_path, outputs.expand()):
                    yield res

            if rerun_outputs is not None:
                for res in _unlock_or_remove(ds_path, rerun_outputs):
                    yield res
    else:
        # If an inject=True caller wants to override the exit code, they can do
        # so in extra_info.
        cmd_exitcode = 0
        exc = None

    try:
        cmd_expanded = format_command(
            ds, cmd,
            pwd=pwd,
            dspath=ds_path,
            # Check if the command contains "{tmpdir}" to avoid creating an
            # unnecessary temporary directory in most but not all cases.
            tmpdir=mkdtemp(prefix="datalad-run-") if "{tmpdir}" in cmd else "",
            inputs=inputs,
            outputs=outputs)
    except KeyError as exc:
        yield get_status_dict(
            'run',
            ds=ds,
            status='impossible',
            message=('command has an unrecognized placeholder: %s',
                     exc))
        return

    if not inject:
        cmd_exitcode, exc = _execute_command(
            cmd_expanded, pwd,
            expected_exit=rerun_info.get("exit", 0) if rerun_info else None)


    # amend commit message with `run` info:
    # - pwd if inside the dataset
    # - the command itself
    # - exit code of the command
    run_info = {
        'cmd': cmd,
        'exit': cmd_exitcode,
        'chain': rerun_info["chain"] if rerun_info else [],
        'inputs': inputs.paths,
        'extra_inputs': extra_inputs.paths,
        'outputs': outputs.paths,
    }
    if rel_pwd is not None:
        # only when inside the dataset to not leak information
        run_info['pwd'] = rel_pwd
    if ds.id:
        run_info["dsid"] = ds.id
    if extra_info:
        run_info.update(extra_info)

    record = json.dumps(run_info, indent=1, sort_keys=True, ensure_ascii=False)

    if sidecar is None:
        use_sidecar = ds.config.get('datalad.run.record-sidecar', default=False)
        use_sidecar = anything2bool(use_sidecar)
    else:
        use_sidecar = sidecar


    if use_sidecar:
        # record ID is hash of record itself
        from hashlib import md5
        record_id = md5(record.encode('utf-8')).hexdigest()
        record_dir = ds.config.get('datalad.run.record-directory', default=op.join('.datalad', 'runinfo'))
        record_path = op.join(ds_path, record_dir, record_id)
        if not op.lexists(record_path):
            # go for compression, even for minimal records not much difference, despite offset cost
            # wrap in list -- there is just one record
            dump2stream([run_info], record_path, compressed=True)

    # compose commit message
    msg = u"""\
[DATALAD RUNCMD] {}

=== Do not change lines below ===
{}
^^^ Do not change lines above ^^^
"""
    msg = msg.format(
        message if message is not None else _format_cmd_shorty(cmd_expanded),
        '"{}"'.format(record_id) if use_sidecar else record)

    outputs_to_save = outputs.expand() if explicit else None
    if outputs_to_save is not None and use_sidecar:
        outputs_to_save.append(record_path)
    do_save = outputs_to_save is None or outputs_to_save
    if not rerun_info and cmd_exitcode:
        if do_save:
            repo = ds.repo
            msg_path = relpath(opj(str(repo.dot_git), "COMMIT_EDITMSG"))
            with open(msg_path, "wb") as ofh:
                ofh.write(ensure_bytes(msg))
            lgr.info("The command had a non-zero exit code. "
                     "If this is expected, you can save the changes with "
                     "'datalad save -d . -r -F %s'",
                     msg_path)
        raise exc
    elif do_save:
        with chpwd(pwd):
            for r in Save.__call__(
                    dataset=ds_path,
                    path=outputs_to_save,
                    recursive=True,
                    message=msg,
                    return_type='generator'):
                yield r
예제 #33
0
def find_first_existing(*globs):
    for g in globs:
        for path in glob(g):
            if lexists(path):
                return path
    return None
예제 #34
0
 def pid(self):
     pid_filename = self.pid_filename()
     if lexists(pid_filename):
         return int(''.join(open(pid_filename).readlines()))
     else:
         return None
예제 #35
0
    def restore(self, view, where='unknow'):
        if view is None or not view.file_name() or view.settings().get(
                'is_widget'):
            return

        if view.is_loading():
            sublime.set_timeout(lambda: self.restore(view, where), 100)
        else:

            id, index = self.view_id(view)

            if debug:
                print '-----------------------------------'
                print 'restoring from ' + where
                print view.file_name()
                print 'id ' + id
                print 'position in tabbar ' + index

            if id in db:

                # if the view changed outside of the application, don't restore folds etc
                if db[id]['id'] == long(view.size()):

                    # fold
                    rs = []
                    for r in db[id]['f']:
                        rs.append(sublime.Region(int(r[0]), int(r[1])))
                    if len(rs):
                        view.fold(rs)

                    # selection
                    if len(db[id]['s']) > 0:
                        view.sel().clear()
                        for r in db[id]['s']:
                            view.sel().add(sublime.Region(
                                int(r[0]), int(r[1])))

                    # marks
                    rs = []
                    for r in db[id]['m']:
                        rs.append(sublime.Region(int(r[0]), int(r[1])))
                    if len(rs):
                        view.add_regions("mark", rs, "mark", "dot",
                                         sublime.HIDDEN | sublime.PERSISTENT)

                    # bookmarks
                    rs = []
                    for r in db[id]['b']:
                        rs.append(sublime.Region(int(r[0]), int(r[1])))
                    if len(rs):
                        view.add_regions("bookmarks", rs, "bookmarks",
                                         "bookmark",
                                         sublime.HIDDEN | sublime.PERSISTENT)

                # color scheme
                if Pref.remember_color_scheme and 'c' in db[
                        id] and view.settings().get(
                            'color_scheme') != db[id]['c']:
                    view.settings().set('color_scheme', db[id]['c'])

                # syntax
                if view.settings().get('syntax') != db[id]['x'] and lexists(
                        sublime.packages_path() + '/../' + db[id]['x']):
                    view.settings().set('syntax', db[id]['x'])

                # scroll
                if int(sublime.version()) >= 2151:
                    if index in db[id]['l']:
                        view.set_viewport_position(tuple(db[id]['l'][index]),
                                                   False)
                    else:
                        view.set_viewport_position(tuple(db[id]['l']['0']),
                                                   False)
예제 #36
0
파일: dlad.py 프로젝트: mohebyani/heudiconv
def add_to_datalad(topdir, studydir, msg, bids):
    """Do all necessary preparations (if were not done before) and save
    """
    import datalad.api as dl
    from datalad.api import Dataset
    from datalad.support.annexrepo import AnnexRepo
    from datalad.support.external_versions import external_versions
    assert external_versions['datalad'] >= MIN_VERSION, (
        "Need datalad >= {}".format(MIN_VERSION))  # add to reqs

    studyrelpath = op.relpath(studydir, topdir)
    assert not studyrelpath.startswith(op.pardir)  # so we are under
    # now we need to test and initiate a DataLad dataset all along the path
    curdir_ = topdir
    superds = None
    subdirs = [''] + [d for d in studyrelpath.split(op.sep) if d != os.curdir]
    for isubdir, subdir in enumerate(subdirs):
        curdir_ = op.join(curdir_, subdir)
        ds = Dataset(curdir_)
        if not ds.is_installed():
            lgr.info("Initiating %s", ds)
            # would require annex > 20161018 for correct operation on annex v6
            # need to add .gitattributes first anyways
            ds_ = dl.create(
                curdir_,
                dataset=superds,
                force=True,
                # initiate annex only at the bottom repository
                no_annex=isubdir < (len(subdirs) - 1),
                fake_dates=True,
                # shared_access='all',
            )
            assert ds == ds_
        assert ds.is_installed()
        superds = ds

    # TODO: we need a helper (in DataLad ideally) to ease adding such
    # specifications
    gitattributes_path = op.join(studydir, '.gitattributes')
    # We will just make sure that all our desired rules are present in it
    desired_attrs = """\
* annex.largefiles=(largerthan=100kb)
*.json annex.largefiles=nothing
*.txt annex.largefiles=nothing
*.tsv annex.largefiles=nothing
*.nii.gz annex.largefiles=anything
*.tgz annex.largefiles=anything
*_scans.tsv annex.largefiles=anything
"""
    if op.exists(gitattributes_path):
        with open(gitattributes_path, 'rb') as f:
            known_attrs = [
                line.decode('utf-8').rstrip() for line in f.readlines()
            ]
    else:
        known_attrs = []
    for attr in desired_attrs.split('\n'):
        if attr not in known_attrs:
            known_attrs.append(attr)
    with open(gitattributes_path, 'wb') as f:
        f.write('\n'.join(known_attrs).encode('utf-8'))

    # ds might have memories of having ds.repo GitRepo
    superds = Dataset(topdir)
    assert op.realpath(ds.path) == op.realpath(studydir)
    assert isinstance(ds.repo, AnnexRepo)
    # Add doesn't have all the options of save such as msg and supers
    ds.save(path=['.gitattributes'],
            message="Custom .gitattributes",
            to_git=True)
    dsh = dsh_path = None
    if op.lexists(op.join(ds.path, '.heudiconv')):
        dsh_path = op.join(ds.path, '.heudiconv')
        dsh = Dataset(dsh_path)
        if not dsh.is_installed():
            # Previously we did not have it as a submodule, and since no
            # automagic migration is implemented, we just need to check first
            # if any path under .heudiconv is already under git control
            if any(x.startswith('.heudiconv/') for x in ds.repo.get_files()):
                lgr.warning(
                    "%s has .heudiconv not as a submodule from previous"
                    " versions of heudiconv. No automagic migration is "
                    "yet provided", ds)
            else:
                dsh = ds.create(
                    path='.heudiconv',
                    force=True,
                    # shared_access='all'
                )
        # Since .heudiconv could contain sensitive information
        # we place all files under annex and then add
        if create_file_if_missing(op.join(dsh_path, '.gitattributes'),
                                  """* annex.largefiles=anything"""):
            ds.save(
                '.heudiconv/.gitattributes',
                to_git=True,
                message="Added gitattributes to place all .heudiconv content"
                " under annex")
    ds.save('.',
            recursive=True
            # not in effect! ?
            #annex_add_opts=['--include-dotfiles']
            )

    # TODO: filter for only changed files?
    # Provide metadata for sensitive information
    mark_sensitive(ds, 'sourcedata')
    mark_sensitive(ds, '*_scans.tsv')  # top level
    mark_sensitive(ds, '*/*_scans.tsv')  # within subj
    mark_sensitive(ds, '*/*/*_scans.tsv')  # within sess/subj
    mark_sensitive(ds, '*/anat')  # within subj
    mark_sensitive(ds, '*/*/anat')  # within ses/subj
    if dsh_path:
        mark_sensitive(ds, '.heudiconv')  # entire .heudiconv!
    superds.save(path=ds.path, message=msg, recursive=True)

    assert not ds.repo.dirty
    # TODO:  they are still appearing as native annex symlinked beasts
    """
예제 #37
0
파일: version.py 프로젝트: kyleam/niceman
#   copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Defines version to be imported in the module and obtained from setup.py
"""

from os.path import lexists, dirname, join as opj, curdir

# Hard coded version, to be done by release process
__version__ = '0.2.1'

# NOTE: might cause problems with "python setup.py develop" deployments
#  so I have even changed buildbot to use  pip install -e .
moddir = dirname(__file__)
projdir = curdir if moddir == 'reproman' else dirname(moddir)
if lexists(opj(projdir, '.git')):
    # If under git -- attempt to deduce a better "dynamic" version following git
    try:
        import sys
        from subprocess import Popen, PIPE
        from os.path import dirname
        git = Popen([
            'git', 'describe', '--abbrev=4', '--dirty', '--match', '[0-9]*\.*'
        ],
                    stdout=PIPE,
                    stderr=PIPE,
                    cwd=projdir)
        if git.wait() != 0:
            raise OSError("Could not run git describe")
        line = git.stdout.readlines()[0]
        _ = git.stderr.readlines()
예제 #38
0
import os
from os.path import join, expanduser, lexists

CLIENT_DIR = join(expanduser("~"), ".gcal-quickeradd")
if not lexists(CLIENT_DIR):
    os.makedirs(CLIENT_DIR)

CREDENTIALS_FILE = join(CLIENT_DIR, "gcal-credentials")
CLIENT_SECRET_FILE = join(CLIENT_DIR, 'client_secret.json')
예제 #39
0
def save_dataset(ds, paths, message=None):
    """Save changes in a single dataset.

    Parameters
    ----------
    ds : Dataset
      The dataset to be saved.
    paths : list
      Annotated paths to dataset components to be saved.
    message: str, optional
      (Commit) message to be attached to the saved state.

    Returns
    -------
    bool
      Whether a new state was saved. If all to be saved content was unmodified
      no new state will be saved.
    """
    # XXX paths must be in the given ds, no further sanity checks!

    # make sure that all pending changes (batched annex operations, etc.)
    # are actually reflected in Git
    ds.repo.precommit()

    # track what is to be committed, so it becomes
    # possible to decide when/what to save further down
    # and one level up
    orig_hexsha = ds.repo.get_hexsha()

    # check whether we want to commit anything staged, or individual pieces
    # this is independent of actually staging individual bits
    save_entire_ds = False
    for ap in paths:
        if ap['path'] == ds.path:
            save_entire_ds = True
            break

    # asking yourself why we need to `add` at all? For example, freshly
    # unlocked files in a v5 repo are listed as "typechange" and commit
    # refuses to touch them without an explicit `add`
    to_gitadd = [
        ap['path'] for ap in paths
        # if not flagged as staged
        if not ap.get('staged', False) and
        # must exist, anything else needs no staging, can be committed directly
        lexists(ap['path']) and
        # not an annex repo, hence no choice other than git
        (
            not isinstance(ds.repo, AnnexRepo) or
            # even in an annex repo we want to use `git add` for submodules
            (ap.get('type', None) == 'dataset' and not ap['path'] == ds.path))
    ]
    to_annexadd = [
        ap['path'] for ap in paths
        # not passed to git add
        if ap['path'] not in to_gitadd and
        # if not flagged as staged
        not ap.get('staged', False) and
        # prevent `git annex add .` in a subdataset, if not desired
        not ap.get('process_updated_only', False) and
        # must exist, anything else needs no staging, can be committed directly
        lexists(ap['path'])
    ]

    if to_gitadd or save_entire_ds:
        ds.repo.add(
            to_gitadd,
            git=True,
            commit=False,
            # this makes sure that pending submodule updates are added too
            update=save_entire_ds)
    if to_annexadd:
        ds.repo.add(to_annexadd, commit=False)

    _datalad_msg = False
    if not message:
        message = 'Recorded existing changes'
        _datalad_msg = True

    # we will blindly call commit not knowing if there is anything to
    # commit -- this is cheaper than to anticipate all possible ways
    # a repo in whatever mode is dirty
    paths_to_commit = None
    if not save_entire_ds:
        paths_to_commit = []
        for ap in paths:
            paths_to_commit.append(ap['path'])
            # was file renamed?
            path_src = ap.get('path_src')
            if path_src and path_src != ap['path']:
                paths_to_commit.append(path_src)

    ds.repo.commit(message,
                   files=paths_to_commit,
                   _datalad_msg=_datalad_msg,
                   careless=True)

    current_hexsha = ds.repo.get_hexsha()
    _was_modified = current_hexsha != orig_hexsha
    return current_hexsha if _was_modified else None
예제 #40
0
파일: bids.py 프로젝트: sjurbeyer/heudiconv
def populate_aggregated_jsons(path):
    """Aggregate across the entire BIDS dataset .json's into top level .json's

    Top level .json files would contain only the fields which are
    common to all subject[/session]/type/*_modality.json's.

    ATM aggregating only for *_task*_bold.json files. Only the task- and
    OPTIONAL _acq- field is retained within the aggregated filename.  The other
    BIDS _key-value pairs are "aggregated over".

    Parameters
    ----------
    path: str
      Path to the top of the BIDS dataset
    """
    # TODO: collect all task- .json files for func files to
    tasks = {}
    # way too many -- let's just collect all which are the same!
    # FIELDS_TO_TRACK = {'RepetitionTime', 'FlipAngle', 'EchoTime',
    #                    'Manufacturer', 'SliceTiming', ''}
    for fpath in find_files('.*_task-.*\_bold\.json',
                            topdir=path,
                            exclude_vcs=True,
                            exclude="/\.(datalad|heudiconv)/"):
        #
        # According to BIDS spec I think both _task AND _acq (may be more?
        # _rec, _dir, ...?) should be retained?
        # TODO: if we are to fix it, then old ones (without _acq) should be
        # removed first
        task = re.sub('.*_(task-[^_\.]*(_acq-[^_\.]*)?)_.*', r'\1', fpath)
        json_ = load_json(fpath)
        if task not in tasks:
            tasks[task] = json_
        else:
            rec = tasks[task]
            # let's retain only those fields which have the same value
            for field in sorted(rec):
                if field not in json_ or json_[field] != rec[field]:
                    del rec[field]
        # create a stub onsets file for each one of those
        suf = '_bold.json'
        assert fpath.endswith(suf)
        # specify the name of the '_events.tsv' file:
        if '_echo-' in fpath:
            # multi-echo sequence: bids (1.1.0) specifies just one '_events.tsv'
            #   file, common for all echoes.  The name will not include _echo-.
            # TODO: RF to use re.match for better readability/robustness
            # So, find out the echo number:
            fpath_split = fpath.split('_echo-',
                                      1)  # split fpath using '_echo-'
            fpath_split_2 = fpath_split[1].split(
                '_', 1)  # split the second part of fpath_split using '_'
            echoNo = fpath_split_2[0]  # get echo number
            if echoNo == '1':
                if len(fpath_split_2) != 2:
                    raise ValueError("Found no trailer after _echo-")
                # we modify fpath to exclude '_echo-' + echoNo:
                fpath = fpath_split[0] + '_' + fpath_split_2[1]
            else:
                # for echoNo greater than 1, don't create the events file, so go to
                #   the next for loop iteration:
                continue

        events_file = fpath[:-len(suf)] + '_events.tsv'
        # do not touch any existing thing, it may be precious
        if not op.lexists(events_file):
            lgr.debug("Generating %s", events_file)
            with open(events_file, 'w') as f:
                f.write("onset\tduration\ttrial_type\tresponse_time\tstim_file"
                        "\tTODO -- fill in rows and add more tab-separated "
                        "columns if desired")
    # extract tasks files stubs
    for task_acq, fields in tasks.items():
        task_file = op.join(path, task_acq + '_bold.json')
        # Since we are pulling all unique fields we have to possibly
        # rewrite this file to guarantee consistency.
        # See https://github.com/nipy/heudiconv/issues/277 for a usecase/bug
        # when we didn't touch existing one.
        # But the fields we enter (TaskName and CogAtlasID) might need need
        # to be populated from the file if it already exists
        placeholders = {
            "TaskName": ("TODO: full task name for %s" %
                         task_acq.split('_')[0].split('-')[1]),
            "CogAtlasID":
            "TODO",
        }
        if op.lexists(task_file):
            j = load_json(task_file)
            # Retain possibly modified placeholder fields
            for f in placeholders:
                if f in j:
                    placeholders[f] = j[f]
            act = "Regenerating"
        else:
            act = "Generating"
        lgr.debug("%s %s", act, task_file)
        fields.update(placeholders)
        save_json(task_file, fields, sort_keys=True, pretty=True)
예제 #41
0
파일: eol.py 프로젝트: DubMastr/SublimeKodi
def _paths_from_path_patterns(path_patterns,
                              files=True,
                              dirs="never",
                              recursive=True,
                              includes=None,
                              excludes=None,
                              skip_dupe_dirs=False,
                              follow_symlinks=False,
                              on_error=_NOT_SPECIFIED):
    """_paths_from_path_patterns([<path-patterns>, ...]) -> file paths

    Generate a list of paths (files and/or dirs) represented by the given path
    patterns.

        "path_patterns" is a list of paths optionally using the '*', '?' and
            '[seq]' glob patterns.
        "files" is boolean (default True) indicating if file paths
            should be yielded
        "dirs" is string indicating under what conditions dirs are
            yielded. It must be one of:
              never             (default) never yield dirs
              always            yield all dirs matching given patterns
              if-not-recursive  only yield dirs for invocations when
                                recursive=False
            See use cases below for more details.
        "recursive" is boolean (default True) indicating if paths should
            be recursively yielded under given dirs.
        "includes" is a list of file patterns to include in recursive
            searches.
        "excludes" is a list of file and dir patterns to exclude.
            (Note: This is slightly different than GNU grep's --exclude
            option which only excludes *files*.  I.e. you cannot exclude
            a ".svn" dir.)
        "skip_dupe_dirs" can be set True to watch for and skip
            descending into a dir that has already been yielded. Note
            that this currently does not dereference symlinks.
        "follow_symlinks" is a boolean indicating whether to follow
            symlinks (default False). To guard against infinite loops
            with circular dir symlinks, only dir symlinks to *deeper*
            dirs are followed.
        "on_error" is an error callback called when a given path pattern
            matches nothing:
                on_error(PATH_PATTERN)
            If not specified, the default is look for a "log" global and
            call:
                log.error("`%s': No such file or directory")
            Specify None to do nothing.

    Typically this is useful for a command-line tool that takes a list
    of paths as arguments. (For Unix-heads: the shell on Windows does
    NOT expand glob chars, that is left to the app.)

    Use case #1: like `grep -r`
      {files=True, dirs='never', recursive=(if '-r' in opts)}
        script FILE     # yield FILE, else call on_error(FILE)
        script DIR      # yield nothing
        script PATH*    # yield all files matching PATH*; if none,
                        # call on_error(PATH*) callback
        script -r DIR   # yield files (not dirs) recursively under DIR
        script -r PATH* # yield files matching PATH* and files recursively
                        # under dirs matching PATH*; if none, call
                        # on_error(PATH*) callback

    Use case #2: like `file -r` (if it had a recursive option)
      {files=True, dirs='if-not-recursive', recursive=(if '-r' in opts)}
        script FILE     # yield FILE, else call on_error(FILE)
        script DIR      # yield DIR, else call on_error(DIR)
        script PATH*    # yield all files and dirs matching PATH*; if none,
                        # call on_error(PATH*) callback
        script -r DIR   # yield files (not dirs) recursively under DIR
        script -r PATH* # yield files matching PATH* and files recursively
                        # under dirs matching PATH*; if none, call
                        # on_error(PATH*) callback

    Use case #3: kind of like `find .`
      {files=True, dirs='always', recursive=(if '-r' in opts)}
        script FILE     # yield FILE, else call on_error(FILE)
        script DIR      # yield DIR, else call on_error(DIR)
        script PATH*    # yield all files and dirs matching PATH*; if none,
                        # call on_error(PATH*) callback
        script -r DIR   # yield files and dirs recursively under DIR
                        # (including DIR)
        script -r PATH* # yield files and dirs matching PATH* and recursively
                        # under dirs; if none, call on_error(PATH*)
                        # callback
    """
    from os.path import exists, isdir, join, normpath, abspath, lexists, islink, realpath
    from glob import glob

    assert not isinstance(path_patterns, _BASESTRING), \
        "'path_patterns' must be a sequence, not a string: %r" % path_patterns
    if includes is None:
        includes = []
    if excludes is None:
        excludes = []
    GLOB_CHARS = '*?['

    if skip_dupe_dirs:
        searched_dirs = set()

    for path_pattern in path_patterns:
        # Determine the set of paths matching this path_pattern.
        for glob_char in GLOB_CHARS:
            if glob_char in path_pattern:
                paths = glob(path_pattern)
                break
        else:
            if follow_symlinks:
                paths = exists(path_pattern) and [path_pattern] or []
            else:
                paths = lexists(path_pattern) and [path_pattern] or []
        if not paths:
            if on_error is None:
                pass
            elif on_error is _NOT_SPECIFIED:
                try:
                    log.error("`%s': No such file or directory", path_pattern)
                except (NameError, AttributeError):
                    pass
            else:
                on_error(path_pattern)

        for path in paths:
            if (follow_symlinks or not islink(path)) and isdir(path):
                if skip_dupe_dirs:
                    canon_path = normpath(abspath(path))
                    if follow_symlinks:
                        canon_path = realpath(canon_path)
                    if canon_path in searched_dirs:
                        continue
                    else:
                        searched_dirs.add(canon_path)

                # 'includes' SHOULD affect whether a dir is yielded.
                if (dirs == "always" or
                    (dirs == "if-not-recursive"
                     and not recursive)) and _should_include_path(
                         path, includes, excludes):
                    yield path

                # However, if recursive, 'includes' should NOT affect
                # whether a dir is recursed into. Otherwise you could
                # not:
                #   script -r --include="*.py" DIR
                if recursive and _should_include_path(path, [], excludes):
                    for dirpath, dirnames, filenames in _walk(
                            path, follow_symlinks=follow_symlinks):
                        dir_indeces_to_remove = []
                        for i, dirname in enumerate(dirnames):
                            d = join(dirpath, dirname)
                            if skip_dupe_dirs:
                                canon_d = normpath(abspath(d))
                                if follow_symlinks:
                                    canon_d = realpath(canon_d)
                                if canon_d in searched_dirs:
                                    dir_indeces_to_remove.append(i)
                                    continue
                                else:
                                    searched_dirs.add(canon_d)
                            if dirs == "always" \
                               and _should_include_path(d, includes, excludes):
                                yield d
                            if not _should_include_path(d, [], excludes):
                                dir_indeces_to_remove.append(i)
                        for i in reversed(dir_indeces_to_remove):
                            del dirnames[i]
                        if files:
                            for filename in sorted(filenames):
                                f = join(dirpath, filename)
                                if _should_include_path(f, includes, excludes):
                                    yield f

            elif files and _should_include_path(path, includes, excludes):
                yield path
예제 #42
0
def compress_dicoms(dicom_list, out_prefix, tempdirs, overwrite):
    """Archives DICOMs into a tarball

    Also tries to do it reproducibly, so takes the date for files
    and target tarball based on the series time (within the first file)

    Parameters
    ----------
    dicom_list : list of str
      list of dicom files
    out_prefix : str
      output path prefix, including the portion of the output file name
      before .dicom.tgz suffix
    tempdirs : object
      TempDirs object to handle multiple tmpdirs
    overwrite : bool
      Overwrite existing tarfiles

    Returns
    -------
    filename : str
      Result tarball
    """

    tmpdir = tempdirs(prefix='dicomtar')
    outtar = out_prefix + '.dicom.tgz'

    if op.exists(outtar) and not overwrite:
        lgr.info("File {} already exists, will not overwrite".format(outtar))
        return
    # tarfile encodes current time.time inside making those non-reproducible
    # so we should choose which date to use.
    # Solution from DataLad although ugly enough:

    dicom_list = sorted(dicom_list)
    dcm_time = get_dicom_series_time(dicom_list)

    def _assign_dicom_time(ti):
        # Reset the date to match the one of the last commit, not from the
        # filesystem since git doesn't track those at all
        ti.mtime = dcm_time
        return ti

    # poor man mocking since can't rely on having mock
    try:
        import time
        _old_time = time.time
        time.time = lambda: dcm_time
        if op.lexists(outtar):
            os.unlink(outtar)
        with tarfile.open(outtar, 'w:gz', dereference=True) as tar:
            for filename in dicom_list:
                outfile = op.join(tmpdir, op.basename(filename))
                if not op.islink(outfile):
                    os.symlink(op.realpath(filename), outfile)
                # place into archive stripping any lead directories and
                # adding the one corresponding to prefix
                tar.add(outfile,
                        arcname=op.join(op.basename(out_prefix),
                                        op.basename(outfile)),
                        recursive=False,
                        filter=_assign_dicom_time)
    finally:
        time.time = _old_time
        tempdirs.rmtree(tmpdir)

    return outtar
예제 #43
0
def populate_bids_templates(path, defaults={}):
    """Premake BIDS text files with templates"""

    lgr.info("Populating template files under %s", path)
    descriptor = op.join(path, 'dataset_description.json')
    if not op.lexists(descriptor):
        save_json(
            descriptor,
            OrderedDict([
                ('Name', "TODO: name of the dataset"),
                ('BIDSVersion', "1.0.1"),
                ('License',
                 defaults.get(
                     'License', "TODO: choose a license, e.g. PDDL "
                     "(http://opendatacommons.org/licenses/pddl/)")),
                ('Authors',
                 defaults.get(
                     'Authors',
                     ["TODO:", "First1 Last1", "First2 Last2", "..."])),
                ('Acknowledgements',
                 defaults.get('Acknowledgements',
                              'TODO: whom you want to acknowledge')),
                ('HowToAcknowledge',
                 "TODO: describe how to acknowledge -- either cite a "
                 "corresponding paper, or just in acknowledgement "
                 "section"), ('Funding', ["TODO", "GRANT #1", "GRANT #2"]),
                ('ReferencesAndLinks', ["TODO", "List of papers or websites"]),
                ('DatasetDOI', 'TODO: eventually a DOI for the dataset')
            ]))
    sourcedata_README = op.join(path, 'sourcedata', 'README')
    if op.exists(op.dirname(sourcedata_README)):
        create_file_if_missing(sourcedata_README, (
            "TODO: Provide description about source data, e.g. \n"
            "Directory below contains DICOMS compressed into tarballs per "
            "each sequence, replicating directory hierarchy of the BIDS dataset"
            " itself."))
    create_file_if_missing(
        op.join(path, 'CHANGES'), "0.0.1  Initial data acquired\n"
        "TODOs:\n\t- verify and possibly extend information in participants.tsv"
        " (see for example http://datasets.datalad.org/?dir=/openfmri/ds000208)"
        "\n\t- fill out dataset_description.json, README, sourcedata/README"
        " (if present)\n\t- provide _events.tsv file for each _bold.nii.gz with"
        " onsets of events (see  '8.5 Task events'  of BIDS specification)")
    create_file_if_missing(
        op.join(path, 'README'),
        "TODO: Provide description for the dataset -- basic details about the "
        "study, possibly pointing to pre-registration (if public or embargoed)"
    )

    # TODO: collect all task- .json files for func files to
    tasks = {}
    # way too many -- let's just collect all which are the same!
    # FIELDS_TO_TRACK = {'RepetitionTime', 'FlipAngle', 'EchoTime',
    #                    'Manufacturer', 'SliceTiming', ''}
    for fpath in find_files('.*_task-.*\_bold\.json',
                            topdir=path,
                            exclude_vcs=True,
                            exclude="/\.(datalad|heudiconv)/"):
        task = re.sub('.*_(task-[^_\.]*(_acq-[^_\.]*)?)_.*', r'\1', fpath)
        json_ = load_json(fpath)
        if task not in tasks:
            tasks[task] = json_
        else:
            rec = tasks[task]
            # let's retain only those fields which have the same value
            for field in sorted(rec):
                if field not in json_ or json_[field] != rec[field]:
                    del rec[field]
        # create a stub onsets file for each one of those
        suf = '_bold.json'
        assert fpath.endswith(suf)
        events_file = fpath[:-len(suf)] + '_events.tsv'
        # do not touch any existing thing, it may be precious
        if not op.lexists(events_file):
            lgr.debug("Generating %s", events_file)
            with open(events_file, 'w') as f:
                f.write(
                    "onset\tduration\ttrial_type\tresponse_time\tstim_file\tTODO -- fill in rows and add more tab-separated columns if desired"
                )
    # extract tasks files stubs
    for task_acq, fields in tasks.items():
        task_file = op.join(path, task_acq + '_bold.json')
        # do not touch any existing thing, it may be precious
        if not op.lexists(task_file):
            lgr.debug("Generating %s", task_file)
            fields["TaskName"] = ("TODO: full task name for %s" %
                                  task_acq.split('_')[0].split('-')[1])
            fields["CogAtlasID"] = "TODO"
            with open(task_file, 'w') as f:
                f.write(json_dumps_pretty(fields, indent=2, sort_keys=True))
예제 #44
0
def copyfile(originalfile,
             newfile,
             copy=False,
             create_new=False,
             hashmethod=None,
             use_hardlink=False,
             copy_related_files=True):
    """Copy or link ``originalfile`` to ``newfile``.

    If ``use_hardlink`` is True, and the file can be hard-linked, then a
    link is created, instead of copying the file.

    If a hard link is not created and ``copy`` is False, then a symbolic
    link is created.

    Parameters
    ----------
    originalfile : str
        full path to original file
    newfile : str
        full path to new file
    copy : Bool
        specifies whether to copy or symlink files
        (default=False) but only for POSIX systems
    use_hardlink : Bool
        specifies whether to hard-link files, when able
        (Default=False), taking precedence over copy
    copy_related_files : Bool
        specifies whether to also operate on related files, as defined in
        ``related_filetype_sets``

    Returns
    -------
    None

    """
    newhash = None
    orighash = None
    fmlogger.debug(newfile)

    if create_new:
        while op.exists(newfile):
            base, fname, ext = split_filename(newfile)
            s = re.search('_c[0-9]{4,4}$', fname)
            i = 0
            if s:
                i = int(s.group()[2:]) + 1
                fname = fname[:-6] + "_c%04d" % i
            else:
                fname += "_c%04d" % i
            newfile = base + os.sep + fname + ext

    if hashmethod is None:
        hashmethod = config.get('execution', 'hash_method').lower()

    # Don't try creating symlinks on CIFS
    if copy is False and on_cifs(newfile):
        copy = True

    # Existing file
    # -------------
    # Options:
    #   symlink
    #       to regular file originalfile            (keep if symlinking)
    #       to same dest as symlink originalfile    (keep if symlinking)
    #       to other file                           (unlink)
    #   regular file
    #       hard link to originalfile               (keep)
    #       copy of file (same hash)                (keep)
    #       different file (diff hash)              (unlink)
    keep = False
    if op.lexists(newfile):
        if op.islink(newfile):
            if all((os.readlink(newfile) == op.realpath(originalfile),
                    not use_hardlink, not copy)):
                keep = True
        elif posixpath.samefile(newfile, originalfile):
            keep = True
        else:
            if hashmethod == 'timestamp':
                hashfn = hash_timestamp
            elif hashmethod == 'content':
                hashfn = hash_infile
            else:
                raise AttributeError("Unknown hash method found:", hashmethod)
            newhash = hashfn(newfile)
            fmlogger.debug('File: %s already exists,%s, copy:%d', newfile,
                           newhash, copy)
            orighash = hashfn(originalfile)
            keep = newhash == orighash
        if keep:
            fmlogger.debug('File: %s already exists, not overwriting, copy:%d',
                           newfile, copy)
        else:
            os.unlink(newfile)

    # New file
    # --------
    # use_hardlink & can_hardlink => hardlink
    # ~hardlink & ~copy & can_symlink => symlink
    # ~hardlink & ~symlink => copy
    if not keep and use_hardlink:
        try:
            fmlogger.debug('Linking File: %s->%s', newfile, originalfile)
            # Use realpath to avoid hardlinking symlinks
            os.link(op.realpath(originalfile), newfile)
        except OSError:
            use_hardlink = False  # Disable hardlink for associated files
        else:
            keep = True

    if not keep and not copy and os.name == 'posix':
        try:
            fmlogger.debug('Symlinking File: %s->%s', newfile, originalfile)
            os.symlink(originalfile, newfile)
        except OSError:
            copy = True  # Disable symlink for associated files
        else:
            keep = True

    if not keep:
        try:
            fmlogger.debug('Copying File: %s->%s', newfile, originalfile)
            shutil.copyfile(originalfile, newfile)
        except shutil.Error as e:
            fmlogger.warn(e.message)

    # Associated files
    if copy_related_files:
        related_file_pairs = (get_related_files(f, include_this_file=False)
                              for f in (originalfile, newfile))
        for alt_ofile, alt_nfile in zip(*related_file_pairs):
            if op.exists(alt_ofile):
                copyfile(alt_ofile,
                         alt_nfile,
                         copy,
                         hashmethod=hashmethod,
                         use_hardlink=use_hardlink,
                         copy_related_files=False)

    return newfile
예제 #45
0
import sublime, sublime_plugin
from os.path import lexists, normpath
from hashlib import sha1
from gzip import GzipFile
import thread
from cPickle import load, dump
import time

debug = False

# open

db = {}

database = sublime.packages_path() + '/User/BufferScroll.bin.gz'
if lexists(database):
    try:
        gz = GzipFile(database, 'rb')
        db = load(gz)
        gz.close()
    except:
        db = {}
else:

    # upgrade
    from os import remove, rename

    # from version 6 to 7
    if lexists(sublime.packages_path() + '/User/BufferScroll.bin'):
        try:
            db = load(
예제 #46
0
파일: auto.py 프로젝트: overlake333/datalad
 def _proxy_exists(self, path):
     # TODO: decide either it should may be retrieved right away.
     # For now, as long as it is a symlink pointing to under .git/annex
     if exists(path):
         return True
     return lexists(path) and 'annex/objects' in str(realpath(path))
예제 #47
0
    def __call__(path=None,
                 dataset=None,
                 recursive=False,
                 recursion_limit=None,
                 action=None,
                 unavailable_path_status='',
                 unavailable_path_msg=None,
                 nondataset_path_status='error',
                 force_parentds_discovery=True,
                 force_subds_discovery=True,
                 force_no_revision_change_discovery=True,
                 force_untracked_discovery=True,
                 modified=None):
        # upfront check for the fastest possible response
        if not path and dataset is None:
            # nothing given, try "here", but do not use `require_dataset`, as
            # it will determine the root dataset of `curdir` and further down
            # lead to path annotation of upstairs directories
            dataset = curdir

        if force_subds_discovery and not force_parentds_discovery:
            raise ValueError(
                'subdataset discovery requires parent dataset discovery')

        # CONCEPT: yield with no status to indicate further processing

        # everything in one big loop to be able too yield as fast a possible
        # without any precomputing for all paths
        refds_path = Interface.get_refds_path(dataset)
        if modified is not None and (refds_path is None
                                     or not GitRepo.is_valid_repo(refds_path)):
            raise ValueError(
                "modification detection only works with a base dataset (non-given or found)"
            )

        # prep common result props
        res_kwargs = dict(action=action if action else 'annotate_path',
                          refds=refds_path,
                          logger=lgr)

        # handle the case of recursion into a single dataset without any
        # extra fancy processing first -- full recursion can be done
        # faster than manual recursion, hence we gain quite some speed
        # from these few lines of extra code
        if not modified and not path and refds_path:
            if not GitRepo.is_valid_repo(refds_path):
                yield get_status_dict(
                    # doesn't matter if the path is in another dataset
                    # it was given as reference dataset
                    status=nondataset_path_status,
                    message='given reference dataset is not a dataset',
                    path=refds_path,
                    **res_kwargs)
                return

            refds = Dataset(refds_path)
            path = []
            # yield the dataset itself
            r = get_status_dict(ds=refds, status='', **res_kwargs)
            yield r

            if recursive:
                # if we have nothing given, but need recursion, we need to feed
                # the dataset path itself
                for r in yield_recursive(refds, refds_path, action,
                                         recursion_limit):
                    r.update(res_kwargs)
                    if 'refds' in r and not r['refds']:
                        # avoid cruft
                        del r['refds']
                    yield r
            return

        # goal: structure in a way that makes most information on any path
        # available in a single pass, at the cheapest possible cost
        reported_paths = {}
        requested_paths = ensure_list(path)

        if modified is not None:
            # modification detection would silently kill all nondataset paths
            # but we have to complain about them, hence doing it here
            if requested_paths and refds_path:
                for r in requested_paths:
                    p = r['path'] if isinstance(r, dict) else r
                    p = _resolve_path(p, ds=refds_path)
                    if path_startswith(p, refds_path):
                        # all good
                        continue
                    # not the refds
                    path_props = r if isinstance(r, dict) else {}
                    res = get_status_dict(**dict(res_kwargs, **path_props))
                    res['status'] = nondataset_path_status
                    res['message'] = 'path not associated with reference dataset'
                    reported_paths[r] = res
                    yield res

            # preserve non-existing paths to be silently killed by modification
            # detection and append them to requested_paths again after detection.
            # TODO: This might be melted in with treatment of non dataset paths
            # above. Re-appending those paths seems to be better than yielding
            # directly to avoid code duplication, since both cases later on are
            # dealt with again.
            preserved_paths = []
            if requested_paths:
                [
                    preserved_paths.append(r) for r in requested_paths
                    if not lexists(r['path'] if isinstance(r, dict) else r)
                ]

            # replace the requested paths by those paths that were actually
            # modified underneath or at a requested location
            requested_paths = get_modified_subpaths(
                # either the request, or the base dataset, if there was no request
                requested_paths if requested_paths else [refds_path],
                refds=Dataset(refds_path),
                revision=modified,
                report_no_revision_change=force_no_revision_change_discovery,
                report_untracked='all' if force_untracked_discovery else 'no',
                recursion_limit=recursion_limit)

            from itertools import chain
            # re-append the preserved paths:
            requested_paths = chain(requested_paths, iter(preserved_paths))

        # Possibly to be used "cache" of known subdatasets per each parent
        # to avoid re-querying subdatasets per each path.  The assumption here
        # is that the list of sub-datasets for a given parent should not change
        # through the execution of this loop, which (hypothetically) could be
        # incorrect while annotating paths for some commands.
        # TODO: verify this assumption and possibly add an argument to turn
        #  caching off if/when needed, or provide some other way to invalidate
        #  it
        subdss_cache = {}

        # do not loop over unique(), this could be a list of dicts
        # we avoid duplicates manually below via `reported_paths`
        for path in requested_paths:
            if not isinstance(path, dict):
                path = rawpath2ap(path, refds_path)
            # this is now an annotated path!
            path_props = path
            path = path['path']
            # we need to mark our territory, who knows where this has been
            path_props.update(res_kwargs)

            if path in reported_paths:
                # we already recorded this path in the output
                # this can happen, whenever `path` is a subdataset, that was
                # discovered via recursive processing of another path before
                continue
            # the path exists in some shape or form
            # TODO if we have path_props already we could skip this test
            if isdir(path):
                # keep any existing type info, previously a more expensive run
                # could have discovered an uninstalled 'dataset', and we don't
                # want it to be relabeled to a directory
                path_props['type'] = \
                    path_props.get(
                        'type',
                        'dataset' if not islink(path) and GitRepo.is_valid_repo(path) else 'directory')
                # this could contain all types of additional content
                containing_dir = path if not islink(path) else normpath(
                    opj(path, pardir))
            else:
                if lexists(path):
                    path_props['type'] = 'file'
                else:
                    path_props['state'] = 'absent'
                # for everything else we are interested in the container
                containing_dir = dirname(path)
                if not containing_dir:
                    containing_dir = curdir

            dspath = parent = get_dataset_root(containing_dir)
            if dspath:
                if path_props.get('type', None) == 'dataset':
                    # for a dataset the root is not the parent, for anything else
                    # it is
                    parent = path_props.get('parentds', None)
                    oneupdir = normpath(opj(containing_dir, pardir))
                    if parent is None and (force_parentds_discovery or
                                           (refds_path
                                            and _with_sep(oneupdir).startswith(
                                                _with_sep(refds_path)))):
                        # either forced, or only if we have a reference dataset, and
                        # only if we stay within this refds when searching for the
                        # parent
                        parent = get_dataset_root(
                            normpath(opj(containing_dir, pardir)))
                        # NOTE the `and refds_path` is critical, as it will determine
                        # whether a top-level dataset that was discovered gets the
                        # parent property or not, it won't get it without a common
                        # base dataset, and that is how we always rolled
                    if parent and refds_path:
                        path_props['parentds'] = parent
                        # don't check whether this is actually a true subdataset of the
                        # parent, done further down
                else:
                    # set parent, but prefer existing property
                    path_props['parentds'] = path_props.get('parentds', dspath)

            # test for `dspath` not `parent`, we only need to know whether there is
            # ANY dataset, not which one is the true parent, logic below relies on
            # the fact that we end here, if there is no dataset at all
            if not dspath:
                # not in any dataset
                res = get_status_dict(**dict(res_kwargs, **path_props))
                res['status'] = nondataset_path_status
                res['message'] = 'path not associated with any dataset'
                reported_paths[path] = res
                yield res
                continue

            # check that we only got SUBdatasets
            if refds_path and not path_startswith(dspath, refds_path):
                res = get_status_dict(**dict(res_kwargs, **path_props))
                res['status'] = nondataset_path_status
                res['message'] = \
                    ('path not part of the reference dataset at %s', refds_path)
                reported_paths[path] = res
                yield res
                continue

            if path_props.get('type', None) == 'file':
                # nothing else we can learn about this
                res = get_status_dict(**dict(res_kwargs, **path_props))
                if 'status' not in res:
                    res['status'] = ''
                reported_paths[path] = res
                yield res
                continue

            containing_ds = None
            path_type = path_props.get('type', None)
            if parent and force_subds_discovery and (
                (path_type == 'dataset'
                 and 'registered_subds' not in path_props)
                    or path_type == 'directory' or not lexists(path)):
                # if the path doesn't exist, or is labeled a directory, or a dataset even
                # a dataset (without this info) -> record whether this is a known subdataset
                # to its parent
                containing_ds = Dataset(parent)
                # Possibly "cache" the list of known subdss for parents we
                # have encountered so far
                if parent in subdss_cache:
                    subdss = subdss_cache[parent]
                else:
                    subdss = containing_ds.subdatasets(fulfilled=None,
                                                       recursive=False,
                                                       result_xfm=None,
                                                       result_filter=None,
                                                       return_type='list')
                    subdss_cache[parent] = subdss
                if path in [s['path'] for s in subdss]:
                    if path_type == 'directory' or not lexists(path):
                        # first record that it isn't here, if just a dir or not here at all
                        path_props['state'] = 'absent'
                    # this must be a directory, and it is not installed
                    path_props['type'] = 'dataset'
                    path_props['registered_subds'] = True

            if not lexists(path) or \
                    (path_props.get('type', None) == 'dataset' and
                     path_props.get('state', None) == 'absent'):
                # not there (yet)
                message = unavailable_path_msg if unavailable_path_msg else None
                if message and '%s' in message:
                    message = (message, path)
                path_props['message'] = message
                res = get_status_dict(**dict(res_kwargs, **path_props))
                # assign given status, but only if the props don't indicate a status
                # already
                res['status'] = path_props.get('status',
                                               unavailable_path_status)
                reported_paths[path] = res
                yield res
                continue

            # we know everything we can, report
            res = get_status_dict(**dict(res_kwargs, **path_props))
            if 'status' not in res:
                res['status'] = ''
            reported_paths[path] = res
            yield res

            rec_paths = []
            if recursive:
                # here we need to consider the special case that `path` is
                # a dataset itself, if a recursion_limit is given (e.g.
                # `remove` will do that by default), we need to recurse
                # from the dataset itself, and not its parent to get things
                # right -- this will also avoid needless discovery of
                # unrelated subdatasets
                if path_props.get('type', None) == 'dataset':
                    containing_ds = Dataset(path)
                else:
                    # regular parent, we might have a dataset already
                    containing_ds = Dataset(
                        parent) if containing_ds is None else containing_ds
                for r in yield_recursive(containing_ds, path, action,
                                         recursion_limit):
                    # capture reported paths
                    r.update(res_kwargs)
                    if 'refds' in r and not r['refds']:
                        # avoid cruft
                        del r['refds']
                    reported_paths[r['path']] = r
                    if modified is not None:
                        # we cannot yield right away, maybe it wasn't modified
                        rec_paths.append(r)
                    else:
                        yield r
            if modified is not None and rec_paths:
                # replace the recursively discovered paths by those paths that
                # were actually modified underneath or at a requested location
                for r in get_modified_subpaths(
                        rec_paths,
                        refds=Dataset(refds_path),
                        revision=modified,
                        report_no_revision_change=
                        force_no_revision_change_discovery,
                        report_untracked='all'
                        if force_untracked_discovery else 'no',
                        recursion_limit=recursion_limit):
                    res = get_status_dict(**dict(r, **res_kwargs))
                    reported_paths[res['path']] = res
                    yield res
        return
예제 #48
0
파일: auto.py 프로젝트: overlake333/datalad
    def _dataset_auto_get(self, filepath):
        """Verify that filepath is under annex, and if so and not present - get it"""

        if not self._autoget:
            return
        # if filepath is not there at all (program just "checked" if it could access it
        if not lexists(filepath):
            lgr.log(2, " skipping %s since it is not there", filepath)
            return
        # deduce directory for filepath
        filedir = dirname(filepath)
        annex = None
        if self._repos_cache is not None:
            filedir_parts = filedir.split(pathsep)
            # ATM we do not expect subdatasets under .datalad, so we could take the top
            # level dataset for that
            try:
                filedir = pathsep.join(
                    filedir_parts[:filedir_parts.index(HANDLE_META_DIR)])
            except ValueError:
                # would happen if no .datalad
                pass
            try:
                annex = self._repos_cache[filedir]
            except KeyError:
                pass

        if annex is None:
            try:
                # TODO: verify logic for create -- we shouldn't 'annexify' non-annexified
                # see https://github.com/datalad/datalad/issues/204
                annex = get_repo_instance(filedir)
                lgr.log(2, "Got the repository %s id:%s containing %s", annex,
                        id(annex), filedir)
            except (RuntimeError, InvalidGitRepositoryError) as e:
                # must be not under annex etc
                return
            if self._repos_cache is not None:
                self._repos_cache[filedir] = annex
        if not isinstance(annex, AnnexRepo):
            # not an annex -- can do nothing
            lgr.log(2, " skipping %s since the repo is not annex", filepath)
            return
        # since Git/AnnexRepo functionality treats relative paths relative to the
        # top of the repository and might be outside, get a full path
        if not isabs(filepath):
            filepath = opj(getpwd(), filepath)

        # "quick" check first if under annex at all
        try:
            # might fail.  TODO: troubleshoot when it does e.g.
            # datalad/tests/test_auto.py:test_proxying_open_testrepobased
            under_annex = annex.is_under_annex(filepath, batch=True)
        except:  # MIH: really? what if MemoryError
            under_annex = None
        # either it has content
        if (under_annex or
                under_annex is None) and not annex.file_has_content(filepath):
            lgr.info("AutomagicIO: retrieving file content of %s", filepath)
            out = annex.get(filepath)
            if not out.get('success', False):
                # to assure that it is present and without trailing/leading new lines
                out['note'] = out.get('note', '').strip()
                lgr.error("Failed to retrieve %(file)s: %(note)s", out)
예제 #49
0
def copyfile(
    originalfile,
    newfile,
    copy=False,
    create_new=False,
    use_hardlink=True,
    copy_related_files=True,
):
    """
    Copy or link files.

    If ``use_hardlink`` is True, and the file can be hard-linked, then a
    link is created, instead of copying the file.

    If a hard link is not created and ``copy`` is False, then a symbolic
    link is created.

    .. admonition:: Copy options for existing files

        * symlink

            * to regular file originalfile            (keep if symlinking)
            * to same dest as symlink originalfile    (keep if symlinking)
            * to other file                           (unlink)

        * regular file

            * hard link to originalfile               (keep)
            * copy of file (same hash)                (keep)
            * different file (diff hash)              (unlink)

    .. admonition:: Copy options for new files

        * ``use_hardlink`` & ``can_hardlink`` => hardlink
        * ``~hardlink`` & ``~copy`` & ``can_symlink`` => symlink
        * ``~hardlink`` & ``~symlink`` => copy

    Parameters
    ----------
    originalfile : :obj:`str`
        full path to original file
    newfile : :obj:`str`
        full path to new file
    copy : Bool
        specifies whether to copy or symlink files
        (default=False) but only for POSIX systems
    use_hardlink : Bool
        specifies whether to hard-link files, when able
        (Default=False), taking precedence over copy
    copy_related_files : Bool
        specifies whether to also operate on related files, as defined in
        ``related_filetype_sets``

    Returns
    -------
    None

    """
    newhash = None
    orighash = None
    logger.debug(newfile)

    if create_new:
        while op.exists(newfile):
            base, fname, ext = split_filename(newfile)
            s = re.search("_c[0-9]{4,4}$", fname)
            i = 0
            if s:
                i = int(s.group()[2:]) + 1
                fname = fname[:-6] + "_c%04d" % i
            else:
                fname += "_c%04d" % i
            newfile = base + os.sep + fname + ext

    # Don't try creating symlinks on CIFS
    if copy is False and on_cifs(newfile):
        copy = True

    keep = False
    if op.lexists(newfile):
        if op.islink(newfile):
            if all(
                (
                    os.readlink(newfile) == op.realpath(originalfile),
                    not use_hardlink,
                    not copy,
                )
            ):
                keep = True
        elif posixpath.samefile(newfile, originalfile):
            keep = True
        else:
            newhash = hash_file(newfile)
            logger.debug("File: %s already exists,%s, copy:%d", newfile, newhash, copy)
            orighash = hash_file(originalfile)
            keep = newhash == orighash
        if keep:
            logger.debug(
                "File: %s already exists, not overwriting, copy:%d", newfile, copy
            )
        else:
            os.unlink(newfile)

    if not keep and use_hardlink:
        try:
            logger.debug("Linking File: %s->%s", newfile, originalfile)
            # Use realpath to avoid hardlinking symlinks
            os.link(op.realpath(originalfile), newfile)
        except OSError:
            use_hardlink = False  # Disable hardlink for associated files
        else:
            keep = True

    if not keep and not copy and os.name == "posix":
        try:
            logger.debug("Symlinking File: %s->%s", newfile, originalfile)
            os.symlink(originalfile, newfile)
        except OSError:
            copy = True  # Disable symlink for associated files
        else:
            keep = True

    if not keep:
        try:
            logger.debug("Copying File: %s->%s", newfile, originalfile)
            shutil.copyfile(originalfile, newfile)
        except shutil.Error as e:
            logger.warning(e.message)

    # Associated files
    if copy_related_files:
        related_file_pairs = (
            get_related_files(f, include_this_file=False)
            for f in (originalfile, newfile)
        )
        for alt_ofile, alt_nfile in zip(*related_file_pairs):
            if op.exists(alt_ofile):
                copyfile(
                    alt_ofile,
                    alt_nfile,
                    copy,
                    use_hardlink=use_hardlink,
                    copy_related_files=False,
                )

    return newfile
예제 #50
0
파일: ls.py 프로젝트: xlecours/datalad
    def __call__(loc,
                 recursive=False,
                 fast=False,
                 all_=False,
                 long_=False,
                 config_file=None,
                 list_content=False,
                 json=None):
        if json:
            from datalad.interface.ls_webui import _ls_json

        if isinstance(loc, list) and not len(loc):
            # nothing given, CWD assumed -- just like regular ls
            loc = '.'

        kw = dict(fast=fast, recursive=recursive, all_=all_, long_=long_)
        if isinstance(loc, list):
            return [
                Ls.__call__(loc_,
                            config_file=config_file,
                            list_content=list_content,
                            json=json,
                            **kw) for loc_ in loc
            ]

        # TODO: do some clever handling of kwargs as to remember what were defaults
        # and what any particular implementation actually needs, and then issuing
        # warning if some custom value/option was specified which doesn't apply to the
        # given url

        # rename to not angry Python gods who took all_ good words
        kw['long_'] = kw.pop('long_')

        loc_type = "unknown"
        if loc.startswith('s3://'):
            return _ls_s3(loc,
                          config_file=config_file,
                          list_content=list_content,
                          **kw)
        elif lexists(loc):
            if isdir(loc):
                ds = Dataset(loc)
                if ds.is_installed():
                    return _ls_json(loc, json=json, **
                                    kw) if json else _ls_dataset(loc, **kw)
                    loc_type = False
                else:
                    loc_type = "dir"  # we know that so far for sure
                    # it might have been an uninstalled dataset within super-dataset
                    superds = ds.get_superdataset()
                    if superds:
                        try:
                            subdatasets = Ls._cached_subdatasets[superds.path]
                        except KeyError:
                            subdatasets = Ls._cached_subdatasets[superds.path] \
                                = superds.subdatasets(result_xfm='relpaths')
                        if relpath(ds.path, superds.path) in subdatasets:
                            loc_type = "not installed"
            else:
                loc_type = "file"
                # could list properties -- under annex or git, either clean/dirty
                # etc
                # repo = get_repo_instance(dirname(loc))

        if loc_type:
            #raise ValueError("ATM supporting only s3:// URLs and paths to local datasets")
            # TODO: unify all_ the output here -- _ls functions should just return something
            # to be displayed
            ui.message("{}  {}".format(
                ansi_colors.color_word(loc, ansi_colors.DATASET),
                ansi_colors.color_word(
                    loc_type, ansi_colors.RED if loc_type
                    in {'unknown', 'not installed'} else ansi_colors.BLUE)))
예제 #51
0
    def __call__(cmd=None,
                 dataset=None,
                 inputs=None,
                 outputs=None,
                 expand=None,
                 explicit=False,
                 message=None,
                 sidecar=None,
                 jobcfg='default',
                 submit=False):

        # TODO makes sure a different rel_pwd is handled properly on the remote end
        pwd, rel_pwd = get_command_pwds(dataset)

        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose='preparing a remote command execution')

        try:
            cmd_expanded = format_command(ds,
                                          cmd,
                                          pwd=pwd,
                                          dspath=ds.path,
                                          inputs=inputs,
                                          outputs=outputs)
        except KeyError as exc:
            yield get_status_dict(
                'htcprepare',
                ds=ds,
                status='impossible',
                message=('command has an unrecognized placeholder: %s', exc))
            return

        transfer_files_list = ['pre.sh', 'post.sh']

        # where all the submission packs live
        subroot_dir = get_submissions_dir(ds)
        subroot_dir.mkdir(parents=True, exist_ok=True)

        # location of to-be-created submission
        submission_dir = ut.Path(
            tempfile.mkdtemp(prefix='submit_', dir=text_type(subroot_dir)))
        submission = submission_dir.name[7:]

        split_cmd = shlex.split(cmd_expanded)
        # is this a singularity job?
        singularity_job = get_singularity_jobspec(split_cmd)
        if not singularity_job:
            with (submission_dir / 'runner.sh').open('wb') as f:
                f.write(
                    resource_string('datalad_htcondor',
                                    'resources/scripts/runner_direct.sh'))
            job_args = split_cmd
        else:
            # link the container into the submission dir
            (submission_dir / 'singularity.simg').symlink_to(
                ut.Path(singularity_job[0]).resolve())
            transfer_files_list.append('singularity.simg')
            # arguments of the job
            job_args = singularity_job[1]
            job_args.insert(0, 'singularity.simg')

            # TODO conditional on run_as_user=false
            with (submission_dir / 'runner.sh').open('wb') as f:
                f.write(
                    resource_string(
                        'datalad_htcondor',
                        'resources/scripts/runner_singularity_anon.sh'))
        make_executable(submission_dir / 'runner.sh')

        # htcondor wants the log dir to exist at submit time
        # TODO ATM we only support a single job per cluster submission
        (submission_dir / 'job_0' / 'logs').mkdir(parents=True)

        # TODO make job pre/post script selection configurable
        with (submission_dir / 'pre.sh').open('wb') as f:
            f.write(
                resource_string('datalad_htcondor',
                                'resources/scripts/pre_posix_chirp.sh'))
        make_executable(submission_dir / 'pre.sh')

        with (submission_dir / 'post.sh').open('wb') as f:
            f.write(
                resource_string('datalad_htcondor',
                                'resources/scripts/post_posix.sh'))
        make_executable(submission_dir / 'post.sh')

        # API support selection (bound dataset methods and such)
        # internal import to avoid circularities
        from datalad.api import (
            rev_status as status, )

        inputs = GlobbedPaths(inputs, pwd=pwd)
        prepare_inputs(ds, inputs)

        # it could be that an input expression does not expand,
        # because it doesn't match anything. In such a case
        # we need to filter out such globs to not confuse
        # the status() call below that only takes real paths
        inputs = [p for p in inputs.expand(full=True) if op.lexists(p)]
        # now figure out what matches the remaining paths in the
        # entire repo and dump a list of files to transfer
        if inputs:
            with (submission_dir / 'input_files').open('w') as f:
                # TODO disable output renderer
                for p in ds.rev_status(
                        path=inputs,
                        # TODO do we really want that True? I doubt it
                        # this might pull in the world
                        recursive=False,
                        # we would have otherwise no idea
                        untracked='no',
                        result_renderer=None):
                    f.write(text_type(p['path']))
                    f.write(u'\0')
                transfer_files_list.append('input_files')

        if outputs:
            # write the output globs to a file for eval on the execute
            # side
            # XXX we may not want to eval them on the remote side
            # at all, however. This would make things different
            # than with local execute, where we also just write to
            # a dataset and do not have an additional filter
            (submission_dir / 'output_globs').write_text(
                # we need a final trailing delimiter as a terminator
                u'\0'.join(outputs) + u'\0')
            transfer_files_list.append('output_globs')

        (submission_dir /
         'source_dataset_location').write_text(text_type(ds.pathobj) + op.sep)
        transfer_files_list.append('source_dataset_location')

        with (submission_dir / 'cluster.submit').open('w') as f:
            f.write(
                submission_template.format(
                    executable='runner.sh',
                    # TODO if singularity_job else 'job.sh',
                    transfer_files_list=','.join(
                        op.join(op.pardir, f) for f in transfer_files_list),
                    **submission_defaults))

            f.write(u'\narguments = "{}"\nqueue\n'.format(
                # TODO deal with single quotes in the args
                ' '.join("'{}'".format(a) for a in job_args)))

        # dump the run command args into a file for re-use
        # when the result is merged
        # include even args that are already evaluated and
        # acted upon, to be able to convince `run` to create
        # a full run record that maybe could be re-run
        # locally
        json_py.dump(
            dict(
                cmd=cmd,
                inputs=inputs,
                outputs=outputs,
                expand=expand,
                explicit=explicit,
                message=message,
                sidecar=sidecar,
                # report the PWD to, to given `run` a chance
                # to be correct after the fact
                pwd=pwd,
            ),
            text_type(submission_dir / 'runargs.json'))

        # we use this file to inspect what state this submission is in
        (submission_dir / 'status').write_text(u'prepared')

        yield get_status_dict(action='htc_prepare',
                              status='ok',
                              refds=text_type(ds.pathobj),
                              submission=submission,
                              path=text_type(submission_dir),
                              logger=lgr)

        if submit:
            try:
                Runner(cwd=text_type(submission_dir)).run(
                    ['condor_submit', 'cluster.submit'],
                    log_stdout=False,
                    log_stderr=False,
                    expect_stderr=True,
                    expect_fail=True,
                )
                (submission_dir / 'status').write_text(u'submitted')
                yield get_status_dict(action='htc_submit',
                                      status='ok',
                                      submission=submission,
                                      refds=text_type(ds.pathobj),
                                      path=text_type(submission_dir),
                                      logger=lgr)
            except CommandError as e:
                yield get_status_dict(action='htc_submit',
                                      status='error',
                                      submission=submission,
                                      message=('condor_submit failed: %s',
                                               exc_str(e)),
                                      refds=text_type(ds.pathobj),
                                      path=text_type(submission_dir),
                                      logger=lgr)
예제 #52
0
    def download_file(self,
                      file_id,
                      path,
                      existing="error",
                      attrs=None,
                      digests=None):
        """
        Parameters
        ----------
        digests: dict, optional
          possible checksums or other digests provided for the file. Only one
          will be used to verify download
        """
        if op.lexists(path):
            msg = f"File {path!r} already exists"
            if existing == "error":
                raise FileExistsError(msg)
            elif existing == "skip":
                lgr.info(msg + " skipping")
                return
            elif existing == "overwrite":
                pass
            elif existing == "refresh":
                remote_file_mtime = self._get_file_mtime(attrs)
                if remote_file_mtime is None:
                    lgr.warning(
                        f"{path!r} - no mtime or ctime in the record, redownloading"
                    )
                else:
                    stat = os.stat(op.realpath(path))
                    same = []
                    if is_same_time(stat.st_mtime, remote_file_mtime):
                        same.append("mtime")
                    if "size" in attrs and stat.st_size == attrs["size"]:
                        same.append("size")
                    if same == ["mtime", "size"]:
                        # TODO: add recording and handling of .nwb object_id
                        lgr.info(f"{path!r} - same time and size, skipping")
                        return
                    lgr.debug(
                        f"{path!r} - same attributes: {same}.  Redownloading")

        destdir = op.dirname(path)
        os.makedirs(destdir, exist_ok=True)
        # suboptimal since
        # 1. downloads into TMPDIR which might lack space etc.  If anything, we
        # might tune up setting/TMPDIR at the
        # level of download so it goes alongside with the target path
        # (e.g. under .FILENAME.dandi-download). That would speed things up
        # when finalizing the download, possibly avoiding `mv` across partitions
        # 2. unlike upload it doesn't use a callback but relies on a context
        #  manager to be called with an .update.  also it uses only filename
        #  in the progressbar label
        # For starters we would do this implementation but later RF
        # when RF - do not forget to remove progressReporterCls in __init__

        # Will do 3 attempts to avoid some problems due to flaky/overloaded
        # connections, see https://github.com/dandi/dandi-cli/issues/87
        for attempt in range(3):
            try:
                self.downloadFile(file_id, path)
                break
            except gcl.HttpError as exc:
                if is_access_denied(exc) or attempt >= 2:
                    raise
                # sleep a little and retry
                lgr.debug(
                    "Failed to download on attempt#%d, will sleep a bit and retry",
                    attempt,
                )
                time.sleep(random.random() * 5)
        # It seems that above call does not care about setting either mtime
        if attrs:
            mtime = self._get_file_mtime(attrs)
            if mtime:
                os.utime(path, (time.time(), mtime.timestamp()))
        if digests:
            # Pick the first one (ordered according to speed of computation)
            for algo in metadata_digests:
                if algo in digests:
                    break
            else:
                algo = list(digests)[:1]  # first available
            digest = Digester([algo])(path)[algo]
            if digests[algo] != digest:
                lgr.warning(
                    "%s %s is different: downloaded %s, should have been %s.",
                    path,
                    algo,
                    digest,
                    digests[algo],
                )
            else:
                lgr.debug("Verified that %s has correct %s %s", path, algo,
                          digest)
예제 #53
0
def _download_file(downloader,
                   path,
                   size=None,
                   mtime=None,
                   existing="error",
                   digests=None):
    """Common logic for downloading a single file

    Generator downloader:

    TODO: describe expected records it should yield
    - progress
    - error
    - completion

    Parameters
    ----------
    downloader: callable returning a generator
      A backend-specific fixture for downloading some file into path. It should
      be a generator yielding downloaded blocks.
    size: int, optional
      Target size if known
    digests: dict, optional
      possible checksums or other digests provided for the file. Only one
      will be used to verify download
    """
    if op.lexists(path):
        block = f"File {path!r} already exists"
        if existing == "error":
            raise FileExistsError(block)
        elif existing == "skip":
            yield _skip_file("already exists")
            return
        elif existing == "overwrite":
            pass
        elif existing == "refresh":
            if mtime is None:
                lgr.warning(
                    f"{path!r} - no mtime or ctime in the record, redownloading"
                )
            else:
                stat = os.stat(op.realpath(path))
                same = []
                if is_same_time(stat.st_mtime, mtime):
                    same.append("mtime")
                if size is not None and stat.st_size == size:
                    same.append("size")
                # TODO: use digests if available? or if e.g. size is identical
                # but mtime is different
                if same == ["mtime", "size"]:
                    # TODO: add recording and handling of .nwb object_id
                    yield _skip_file("same time and size")
                    return
                lgr.debug(
                    f"{path!r} - same attributes: {same}.  Redownloading")

    if size is not None:
        yield {"size": size}

    destdir = op.dirname(path)
    os.makedirs(destdir, exist_ok=True)

    yield {"status": "downloading"}

    algo, digester, digest, downloaded_digest = None, None, None, None
    if digests:
        # choose first available for now.
        # TODO: reuse that sorting based on speed
        for algo, digest in digests.items():
            if algo == "dandi-etag":
                from .core.digests.dandietag import ETagHashlike

                digester = lambda: ETagHashlike(size)  # noqa: E731
            else:
                digester = getattr(hashlib, algo, None)
            if digester:
                break
        if not digester:
            lgr.warning("Found no digests in hashlib for any of %s",
                        str(digests))

    # TODO: how do we discover the total size????
    # TODO: do not do it in-place, but rather into some "hidden" file
    for attempt in range(3):
        try:
            if digester:
                downloaded_digest = digester()  # start empty
            warned = False
            # I wonder if we could make writing async with downloader
            with DownloadDirectory(path, digests) as dldir:
                downloaded = dldir.offset
                if size is not None and downloaded == size:
                    # Exit early when downloaded == size, as making a Range
                    # request in such a case results in a 416 error from S3.
                    # Problems will result if `size` is None but we've already
                    # downloaded everything.
                    break
                for block in downloader(start_at=dldir.offset):
                    if digester:
                        downloaded_digest.update(block)
                    downloaded += len(block)
                    # TODO: yield progress etc
                    msg = {"done": downloaded}
                    if size:
                        if downloaded > size and not warned:
                            warned = True
                            # Yield ERROR?
                            lgr.warning(
                                "Downloaded %d bytes although size was told to be just %d",
                                downloaded,
                                size,
                            )
                        msg["done%"] = 100 * downloaded / size if size else "100"
                        # TODO: ETA etc
                    yield msg
                    dldir.append(block)
            break
        except requests.exceptions.HTTPError as exc:
            # TODO: actually we should probably retry only on selected codes, and also
            # respect Retry-After
            if attempt >= 2 or exc.response.status_code not in (
                    400,  # Bad Request, but happened with gider:
                    # https://github.com/dandi/dandi-cli/issues/87
                    503,  # Service Unavailable
            ):
                lgr.debug("Download failed: %s", exc)
                yield {"status": "error", "message": str(exc)}
                return
            # if is_access_denied(exc) or attempt >= 2:
            #     raise
            # sleep a little and retry
            lgr.debug(
                "Failed to download on attempt#%d: %s, will sleep a bit and retry",
                attempt,
                exc,
            )
            time.sleep(random.random() * 5)

    if downloaded_digest:
        downloaded_digest = downloaded_digest.hexdigest(
        )  # we care only about hex
        if digest != downloaded_digest:
            msg = f"{algo}: downloaded {downloaded_digest} != {digest}"
            yield {"checksum": "differs", "status": "error", "message": msg}
            lgr.debug("%s is different: %s.", path, msg)
            return
        else:
            yield {"checksum": "ok"}
            lgr.debug("Verified that %s has correct %s %s", path, algo, digest)
    else:
        # shouldn't happen with more recent metadata etc
        yield {
            "checksum": "-",
            # "message": "no digests were provided"
        }

    # TODO: dissolve attrs and pass specific mtime?
    if mtime:
        yield {"status": "setting mtime"}
        os.utime(path, (time.time(), ensure_datetime(mtime).timestamp()))

    yield {"status": "done"}
예제 #54
0
파일: utils.py 프로젝트: debanjum/datalad
def save_dataset(
        ds,
        paths=None,
        message=None,
        version_tag=None):
    """Save changes in a single dataset.

    Parameters
    ----------
    ds : Dataset
      The dataset to be saved.
    paths : list, optional
      Paths to dataset components to be saved.
    message: str, optional
      (Commit) message to be attached to the saved state.
    version_tag : str, optional
      Tag to be assigned to the saved state.

    Returns
    -------
    bool
      Whether a new state was saved. If all to be saved content was unmodified
      no new state will be saved.
    """
    # XXX paths must be in the given ds, no further sanity checks!

    # make sure that all pending changes (batched annex operations, etc.)
    # are actually reflected in Git
    ds.repo.precommit()

    # track what is to be committed, so it becomes
    # possible to decide when/what to save further down
    # and one level up
    orig_hexsha = ds.repo.get_hexsha()

    # always yields list; empty if None
    files = list(
        set(
            [opj(ds.path, f) if not isabs(f) else f for f in assure_list(paths)]))

    # try to consider existing and changed files, and prevent untracked
    # files from being added
    # XXX not acting upon untracked files would be very expensive, because
    # I see no way to avoid using `add` below and git annex has no equivalent
    # to git add's --update -- so for now don't bother
    # XXX alternatively we could consider --no-ignore-removal to also
    # have it delete any already vanished files
    # asking yourself why we need to `add` at all? For example, freshly
    # unlocked files in a v5 repo are listed as "typechange" and commit
    # refuses to touch them without an explicit `add`
    tostage = [f for f in files if lexists(f)]
    if tostage:
        lgr.debug('staging files for commit: %s', tostage)
        if isinstance(ds.repo, AnnexRepo):
            # to make this work without calling `git add` in addition,
            # this needs git-annex v6.20161210 (see #1027)
            ds.repo.add(tostage, commit=False)
        else:
            # --update will ignore any untracked files, sadly git-annex add
            # above does not
            # will complain about vanished files though, filter them here, but
            # keep them for a later commit call
            ds.repo.add(tostage, git_options=['--update'], commit=False)

    _datalad_msg = False
    if not message:
        message = 'Recorded existing changes'
        _datalad_msg = True

    if files or ds.repo.repo.is_dirty(
            index=True,
            working_tree=False,
            untracked_files=False,
            submodules=True):
        # either we have an explicit list of files, or we have something
        # stages otherwise do not attempt to commit, as the underlying
        # repo will happily commit any non-change
        # not checking the working tree or untracked files should make this
        # relavtively cheap

        # TODO: commit() should rather report a dedicated ValueError
        # waiting for #1170
        from datalad.support.exceptions import CommandError
        try:
            # we will blindly call commit not knowing if there is anything to
            # commit -- this is cheaper than to anticipate all possible ways
            # a repo in whatever mode is dirty
            # however, if nothing is dirty the whining wil start
            # --> sucking it up right here
            with swallow_logs(new_level=logging.ERROR) as cml:
                ds.repo.commit(message, options=files, _datalad_msg=_datalad_msg)
        except CommandError as e:
            # TODO until #1171 is resolved, test here for "normal" failure
            # to commit
            if 'nothing to commit' in str(e):
                lgr.debug(
                    "Was instructed to commit %s files but repository is not dirty",
                    files)
            elif 'no changes added to commit':
                lgr.info(
                    'Nothing to save')
            else:
                # relay any prior whining in the exception
                raise ValueError('{} [error log follows] {}; {}'.format(
                    e, cml.out, cml.err))

    # MIH: let's tag even if there was nothing commit. I'd forget this
    # option too often...
    if version_tag:
        ds.repo.tag(version_tag)

    _was_modified = ds.repo.get_hexsha() != orig_hexsha

    return ds.repo.repo.head.commit if _was_modified else None
예제 #55
0
파일: copy_file.py 프로젝트: ypid/datalad
def _replace_file(str_src, dest, str_dest, follow_symlinks):
    if op.lexists(str_dest):
        dest.unlink()
    else:
        dest.parent.mkdir(exist_ok=True, parents=True)
    copyfile(str_src, str_dest, follow_symlinks=follow_symlinks)
예제 #56
0
파일: utils.py 프로젝트: debanjum/datalad
def get_paths_by_dataset(paths, recursive=False, recursion_limit=None,
                         out=None, dir_lookup=None):
    """Sort a list of paths per dataset they are contained in.

    Any paths that are not part of a dataset, or presently unavailable are
    reported.

    Parameter
    ---------
    paths : sequence
      A sequence of path specifications to sort.
    recursive : bool
      Flag whether to report subdatasets under any of the given paths
    recursion_limit :
      Depth constraint for recursion. See `Dataset.get_subdatasets()` for more
      information.
    out : dict or None
      By default a new output dictionary is created, however an existing one
      can be provided via this argument to enable incremental processing.
    dir_lookup : dict or None
      Optional lookup cache that maps paths to previously determined datasets.
      This can speed up repeated processing.

    Returns
    -------
    Tuple(dict, list, list)
      Dict of `existing dataset path`: `path` mappings, the list of currently
      non-existing paths (possibly matching currently uninstalled datasets),
      and any paths that are not part of any dataset.
    """
    # sort paths into the respective datasets
    if dir_lookup is None:
        dir_lookup = {}
    if out is None:
        out = {}
    # paths that don't exist (yet)
    unavailable_paths = []
    nondataset_paths = []
    for path in paths:
        if not lexists(path):
            # not there yet, impossible to say which ds it will actually
            # be in, if any
            unavailable_paths.append(path)
            continue
        # the path exists in some shape or form
        if isdir(path):
            # this could contain all types of additional content
            d = path
        else:
            # for everything else we are interested in the container
            d = dirname(path)
            if not d:
                d = curdir
        # this could be `None` if there is no git repo
        dspath = dir_lookup.get(d, get_dataset_root(d))
        dir_lookup[d] = dspath
        if not dspath:
            nondataset_paths.append(path)
            continue
        if isdir(path):
            ds = Dataset(dspath)
            # we need to doublecheck that this is not a subdataset mount
            # point, in which case get_toppath() would point to the parent
            smpath = ds.get_containing_subdataset(
                path, recursion_limit=1).path
            if smpath != dspath:
                # fix entry
                dir_lookup[d] = smpath
                # submodule still needs to be obtained
                unavailable_paths.append(path)
                continue
            if recursive:
                # make sure we get everything relevant in all _checked out_
                # subdatasets, obtaining of previously unavailable subdataset
                # else done elsewhere
                subs = ds.get_subdatasets(fulfilled=True,
                                          recursive=recursive,
                                          recursion_limit=recursion_limit)
                for sub in subs:
                    subdspath = opj(dspath, sub)
                    if subdspath.startswith(_with_sep(path)):
                        # this subdatasets is underneath the search path
                        # we want it all
                        # be careful to not overwrite anything, in case
                        # this subdataset has been processed before
                        out[subdspath] = out.get(
                            subdspath,
                            [subdspath])
        out[dspath] = out.get(dspath, []) + [path]
    return out, unavailable_paths, nondataset_paths
예제 #57
0
    def __call__(archive,
                 annex=None,
                 add_archive_leading_dir=False,
                 strip_leading_dirs=False,
                 leading_dirs_depth=None,
                 leading_dirs_consider=None,
                 use_current_dir=False,
                 delete=False,
                 key=False,
                 exclude=None,
                 rename=None,
                 existing='fail',
                 annex_options=None,
                 copy=False,
                 commit=True,
                 allow_dirty=False,
                 stats=None,
                 drop_after=False,
                 delete_after=False):
        """
        Returns
        -------
        annex
        """
        if exclude:
            exclude = assure_tuple_or_list(exclude)
        if rename:
            rename = assure_tuple_or_list(rename)

        # TODO: actually I see possibly us asking user either he wants to convert
        # his git repo into annex
        archive_path = archive
        pwd = getpwd()
        if annex is None:
            annex = get_repo_instance(pwd, class_=AnnexRepo)
            if not isabs(archive):
                # if not absolute -- relative to wd and thus
                archive_path = normpath(opj(realpath(pwd), archive))
                # abspath(archive) is not "good" since dereferences links in the path
                # archive_path = abspath(archive)
        elif not isabs(archive):
            # if we are given an annex, then assume that given path is within annex, not
            # relative to PWD
            archive_path = opj(annex.path, archive)
        annex_path = annex.path

        # _rpath below should depict paths relative to the top of the annex
        archive_rpath = relpath(archive_path, annex_path)

        # TODO: somewhat too cruel -- may be an option or smth...
        if not allow_dirty and annex.dirty:
            # already saved me once ;)
            raise RuntimeError(
                "You better commit all the changes and untracked files first")

        if not key:
            # we were given a file which must exist
            if not exists(archive_path):
                raise ValueError("Archive {} does not exist".format(archive))
            # TODO: support adding archives content from outside the annex/repo
            origin = 'archive'
            key = annex.get_file_key(archive_rpath)
            archive_dir = dirname(archive_path)
        else:
            origin = 'key'
            key = archive
            archive_dir = None  # We must not have anything to do with the location under .git/annex

        archive_basename = file_basename(archive)

        if not key:
            # TODO: allow for it to be under git???  how to reference then?
            raise NotImplementedError(
                "Provided file %s is not under annex.  We don't support yet adding everything "
                "straight to git" % archive)

        # are we in a subdirectory of the repository?
        pwd_under_annex = commonprefix([pwd, annex_path]) == annex_path
        #  then we should add content under that
        # subdirectory,
        # get the path relative to the repo top
        if use_current_dir:
            # if outside -- extract to the top of repo
            extract_rpath = relpath(pwd, annex_path) \
                if pwd_under_annex \
                else None
        else:
            extract_rpath = relpath(archive_dir, annex_path)

        # relpath might return '.' as the relative path to curdir, which then normalize_paths
        # would take as instructions to really go from cwd, so we need to sanitize
        if extract_rpath == curdir:
            extract_rpath = None  # no special relpath from top of the repo

        # and operate from now on the key or whereever content available "canonically"
        try:
            key_rpath = annex.get_contentlocation(
                key)  # , relative_to_top=True)
        except:
            raise RuntimeError(
                "Content of %s seems to be N/A.  Fetch it first" % key)

        # now we simply need to go through every file in that archive and
        lgr.info("Adding content of the archive %s into annex %s", archive,
                 annex)

        from datalad.customremotes.archives import ArchiveAnnexCustomRemote
        # TODO: shouldn't we be able just to pass existing AnnexRepo instance?
        # TODO: we will use persistent cache so we could just (ab)use possibly extracted archive
        annexarchive = ArchiveAnnexCustomRemote(path=annex_path,
                                                persistent_cache=True)
        # We will move extracted content so it must not exist prior running
        annexarchive.cache.allow_existing = True
        earchive = annexarchive.cache[key_rpath]

        # TODO: check if may be it was already added
        if ARCHIVES_SPECIAL_REMOTE not in annex.get_remotes():
            init_datalad_remote(annex,
                                ARCHIVES_SPECIAL_REMOTE,
                                autoenable=True)
        else:
            lgr.debug("Special remote {} already exists".format(
                ARCHIVES_SPECIAL_REMOTE))

        precommitted = False
        delete_after_rpath = None
        try:
            old_always_commit = annex.always_commit
            annex.always_commit = False

            if annex_options:
                if isinstance(annex_options, string_types):
                    annex_options = shlex.split(annex_options)

            leading_dir = earchive.get_leading_directory(
                depth=leading_dirs_depth, exclude=exclude, consider=leading_dirs_consider) \
                if strip_leading_dirs else None
            leading_dir_len = len(leading_dir) + len(
                opsep) if leading_dir else 0

            # we need to create a temporary directory at the top level which would later be
            # removed
            prefix_dir = basename(tempfile.mktemp(prefix=".datalad", dir=annex_path)) \
                if delete_after \
                else None

            # dedicated stats which would be added to passed in (if any)
            outside_stats = stats
            stats = ActivityStats()

            for extracted_file in earchive.get_extracted_files():
                stats.files += 1
                extracted_path = opj(earchive.path, extracted_file)

                if islink(extracted_path):
                    link_path = realpath(extracted_path)
                    if not exists(
                            link_path
                    ):  # TODO: config  addarchive.symlink-broken='skip'
                        lgr.warning("Path %s points to non-existing file %s" %
                                    (extracted_path, link_path))
                        stats.skipped += 1
                        continue
                        # TODO: check if points outside of the archive -- warning and skip

                # preliminary target name which might get modified by renames
                target_file_orig = target_file = extracted_file

                # strip leading dirs
                target_file = target_file[leading_dir_len:]

                if add_archive_leading_dir:
                    target_file = opj(archive_basename, target_file)

                if rename:
                    target_file = apply_replacement_rules(rename, target_file)

                # continue to next iteration if extracted_file in excluded
                if exclude:
                    try:  # since we need to skip outside loop from inside loop
                        for regexp in exclude:
                            if re.search(regexp, extracted_file):
                                lgr.debug(
                                    "Skipping {extracted_file} since contains {regexp} pattern"
                                    .format(**locals()))
                                stats.skipped += 1
                                raise StopIteration
                    except StopIteration:
                        continue

                if prefix_dir:
                    target_file = opj(prefix_dir, target_file)
                    # but also allow for it in the orig
                    target_file_orig = opj(prefix_dir, target_file_orig)

                target_file_path_orig = opj(annex.path, target_file_orig)

                url = annexarchive.get_file_url(
                    archive_key=key,
                    file=extracted_file,
                    size=os.stat(extracted_path).st_size)

                # lgr.debug("mv {extracted_path} {target_file}. URL: {url}".format(**locals()))

                target_file_path = opj(extract_rpath, target_file) \
                    if extract_rpath else target_file

                target_file_path = opj(annex.path, target_file_path)

                if lexists(target_file_path):
                    handle_existing = True
                    if md5sum(target_file_path) == md5sum(extracted_path):
                        if not annex.is_under_annex(extracted_path):
                            # if under annex -- must be having the same content,
                            # we should just add possibly a new extra URL
                            # but if under git -- we cannot/should not do
                            # anything about it ATM
                            if existing != 'overwrite':
                                continue
                        else:
                            handle_existing = False
                    if not handle_existing:
                        pass  # nothing... just to avoid additional indentation
                    elif existing == 'fail':
                        raise RuntimeError(
                            "File {} already exists, but new (?) file {} was instructed "
                            "to be placed there while overwrite=False".format(
                                target_file_path, extracted_file))
                    elif existing == 'overwrite':
                        stats.overwritten += 1
                        # to make sure it doesn't conflict -- might have been a tree
                        rmtree(target_file_path)
                    else:
                        target_file_path_orig_ = target_file_path

                        # To keep extension intact -- operate on the base of the filename
                        p, fn = os.path.split(target_file_path)
                        ends_with_dot = fn.endswith('.')
                        fn_base, fn_ext = file_basename(fn, return_ext=True)

                        if existing == 'archive-suffix':
                            fn_base += '-%s' % archive_basename
                        elif existing == 'numeric-suffix':
                            pass  # archive-suffix will have the same logic
                        else:
                            raise ValueError(existing)
                        # keep incrementing index in the suffix until file doesn't collide
                        suf, i = '', 0
                        while True:
                            target_file_path_new = opj(
                                p, fn_base + suf +
                                ('.' if
                                 (fn_ext or ends_with_dot) else '') + fn_ext)
                            if not lexists(target_file_path_new):
                                break
                            lgr.debug("File %s already exists" %
                                      target_file_path_new)
                            i += 1
                            suf = '.%d' % i
                        target_file_path = target_file_path_new
                        lgr.debug("Original file %s will be saved into %s" %
                                  (target_file_path_orig_, target_file_path))
                        # TODO: should we reserve smth like
                        # stats.clobbed += 1

                if target_file_path != target_file_path_orig:
                    stats.renamed += 1

                #target_path = opj(getpwd(), target_file)
                if copy:
                    raise NotImplementedError(
                        "Not yet copying from 'persistent' cache")
                else:
                    # os.renames(extracted_path, target_path)
                    # addurl implementation relying on annex'es addurl below would actually copy
                    pass

                lgr.debug(
                    "Adding %s to annex pointing to %s and with options %r",
                    target_file_path, url, annex_options)

                out_json = annex.add_url_to_file(target_file_path,
                                                 url,
                                                 options=annex_options,
                                                 batch=True)

                if 'key' in out_json and out_json[
                        'key'] is not None:  # annex.is_under_annex(target_file, batch=True):
                    # due to http://git-annex.branchable.com/bugs/annex_drop_is_not___34__in_effect__34___for_load_which_was___34__addurl_--batch__34__ed_but_not_yet_committed/?updated
                    # we need to maintain a list of those to be dropped files
                    if drop_after:
                        annex.drop_key(out_json['key'], batch=True)
                        stats.dropped += 1
                    stats.add_annex += 1
                else:
                    lgr.debug(
                        "File {} was added to git, not adding url".format(
                            target_file_path))
                    stats.add_git += 1

                if delete_after:
                    # delayed removal so it doesn't interfer with batched processes since any pure
                    # git action invokes precommit which closes batched processes. But we like to count
                    stats.removed += 1

                # # chaining 3 annex commands, 2 of which not batched -- less efficient but more bullet proof etc
                # annex.add(target_path, options=annex_options)
                # # above action might add to git or to annex
                # if annex.file_has_content(target_path):
                #     # if not --  it was added to git, if in annex, it is present and output is True
                #     annex.add_url_to_file(target_file, url, options=['--relaxed'], batch=True)
                #     stats.add_annex += 1
                # else:
                #     lgr.debug("File {} was added to git, not adding url".format(target_file))
                #     stats.add_git += 1
                # # TODO: actually check if it is anyhow different from a previous version. If not
                # # then it wasn't really added

                del target_file  # Done with target_file -- just to have clear end of the loop

            if delete and archive and origin != 'key':
                lgr.debug("Removing the original archive {}".format(archive))
                # force=True since some times might still be staged and fail
                annex.remove(archive_rpath, force=True)

            lgr.info("Finished adding %s: %s" %
                     (archive, stats.as_str(mode='line')))

            if outside_stats:
                outside_stats += stats
            if delete_after:
                # force since not committed. r=True for -r (passed into git call
                # to recurse)
                delete_after_rpath = opj(
                    extract_rpath, prefix_dir) if extract_rpath else prefix_dir
                lgr.debug("Removing extracted and annexed files under %s",
                          delete_after_rpath)
                annex.remove(delete_after_rpath, r=True, force=True)
            if commit:
                commit_stats = outside_stats if outside_stats else stats
                annex.precommit(
                )  # so batched ones close and files become annex symlinks etc
                precommitted = True
                if annex.is_dirty(untracked_files=False):
                    annex.commit(
                        "Added content extracted from %s %s\n\n%s" %
                        (origin, archive, commit_stats.as_str(mode='full')),
                        _datalad_msg=True)
                    commit_stats.reset()
        finally:
            # since we batched addurl, we should close those batched processes
            # if haven't done yet.  explicitly checked to avoid any possible
            # "double-action"
            if not precommitted:
                annex.precommit()

            if delete_after_rpath:
                delete_after_path = opj(annex_path, delete_after_rpath)
                if exists(delete_after_path):  # should not be there
                    # but for paranoid yoh
                    lgr.warning(
                        "Removing temporary directory under which extracted "
                        "files were annexed and should have been removed: %s",
                        delete_after_path)
                    rmtree(delete_after_path)

            annex.always_commit = old_always_commit
            # remove what is left and/or everything upon failure
            earchive.clean(force=True)

        return annex
예제 #58
0
 def tearDown(self):
     rm_rf(self.prefix)
     assert not lexists(self.prefix)
예제 #59
0
    def makeLinks(self):
        full = self.options.get("full")
        short = self.options.get("short")
        addfold = self.options.get("addfold")
        fold = self.options.get("fold")
        useServiceNameLinks = self.options.get("useServiceNameLinks")
        useHardLinks = self.options.get("useHardLinks")

        piconLinks = {}
        linksMade = 0
        commentRe = re.compile('#.*')

        for line in self.servrefFile:
            line = commentRe.sub('', line).rstrip()
            if not line:
                continue
            F = line.split()
            if len(F) > 3:
                print >> stderr, "Too many fields in server reference file:", line
                continue
            if len(F) < 3:
                print >> stderr, "Too few fields in server reference file:", line
                continue
            servRef, serviceName, picon = F
            servRefName = servRef
            servRefParts = servRefName.split(':')[0:10]
            servRefs = []
            if useServiceNameLinks:
                servRefs.append([serviceName])
            if full or addfold:
                servRefs.append(servRefParts)
            if short:
                servRefs.append(servRefParts[0:1] + servRefParts[3:7])
            if addfold and (int(servRefParts[0]) & ~0x0100) == 1:
                stype = int(servRefParts[2], 16)
                if stype not in (0x1, 0x2, 0xA):
                    servRefPartsFold = servRefParts[:]
                    servRefPartsFold[2] = "1"
                    servRefs.append(servRefPartsFold)
                # Fake up servicref 0x2 & 0xA for ABC news Radio
                if stype in (0x2, 0xA) and int(servRefParts[5], 16) in (
                        0x1010,
                        0x3201) and int(servRefParts[3], 16) & 0xF == 0xF:
                    servRefPartsFold = servRefParts[:]
                    servRefPartsFold[2] = "2" if stype == 0xA else "A"
                    servRefs.append(servRefPartsFold)
                # Fake up servicref 0x2 & 0xA for ABC news Radio
            if fold and (int(servRefParts[0]) & ~0x0100) == 1:
                stype = int(servRefParts[2], 16)
                if stype not in (0x1, 0x2, 0xA):
                    servRefPartsFold = servRefParts[:]
                    servRefPartsFold[2] = "1"
                servRefs.append(servRefPartsFold)

            for srp in servRefs:
                servRefName = '_'.join(srp) + '.png'

                if piconLinks.get(servRefName) == picon:
                    continue

                if servRefName not in piconLinks:
                    linked = False
                    servRefPath = path.join(self.piconPath, servRefName)

                    exists = path.exists(servRefPath)

                    alreadyOverridden = servRefPath in self.overrides
                    if exists and self.isOverride(servRefPath):
                        if not alreadyOverridden:
                            print >> stderr, "Picon", picon, "over-ridden by specific servref icon", servRefName
                        continue

                    lexists = exists or path.lexists(servRefPath)

                    if (not exists or lexists) and picon in self.piconFiles:
                        piconName, piconRef = self.piconFiles[picon]
                        piconPath = path.join(self.CHAN_PICON_DIR, piconName)
                        if useHardLinks:
                            piconPath = path.join(self.piconPath, piconPath)

                        if servRefName in self.origPiconLinks:
                            if self.origPiconLinks[servRefName] == piconRef:
                                linked = True
                            del self.origPiconLinks[servRefName]

                        if not linked:
                            try:
                                if lexists:
                                    remove(servRefPath)

                                linksMade += 1
                                if useHardLinks:
                                    link(piconPath, servRefPath)
                                else:
                                    symlink(piconPath, servRefPath)
                                linked = True
                            except Exception as err:
                                print >> stderr, (
                                    "Link" if useHardLinks else "Symlink"
                                ), piconName, "->", servRefName, "failed -", str(
                                    err)

                    if linked:
                        self.linkedPiconNames.add(piconName)
                        piconLinks[servRefName] = picon
                    else:
                        if picon not in ("tba", "tobeadvised"):
                            print >> stderr, "No picon", picon, "for", servRef
                else:
                    print >> stderr, "Servref link", servRef, "->", piconLinks[
                        servRefName], "exists; new link requested for", picon
        self.servrefFile.close()
        print >> stderr, "linksMade:", linksMade
예제 #60
0
    def __call__(
            path=None,
            *,
            dataset=None,
            drop='datasets',
            reckless=None,
            message=None,
            jobs=None,
            # deprecated below
            recursive=None,
            check=None,
            save=None,
            if_dirty=None):

        # deprecate checks
        if if_dirty is not None:
            warnings.warn(
                "The `if_dirty` argument of `datalad remove` is ignored, "
                "it can be removed for a safe-by-default behavior. For "
                "other cases consider the `reckless` argument.",
                DeprecationWarning)

        if save is not None:
            warnings.warn(
                "The `save` argument of `datalad remove` is ignored. "
                "A dataset modification is always saved. Consider "
                "`save --amend` if post-remove fix-ups are needed.",
                DeprecationWarning)

        if recursive is not None:
            warnings.warn(
                "The `recursive` argument of `datalad remove` is ignored. "
                "Removal operations are always recursive, and the parameter "
                "can be stripped from calls for a safe-by-default behavior. ",
                DeprecationWarning)

        if check is not None:
            warnings.warn(
                "The `check` argument of `datalad remove` is deprecated, "
                "use the `reckless` argument instead.", DeprecationWarning)

        if check is False:
            if reckless is not None:
                raise ValueError(
                    'Must not use deprecated `check` argument, and new '
                    '`reckless` argument together with `datalad remove`.')
            reckless = 'availability'

        refds = require_dataset(dataset,
                                check_installed=True,
                                purpose='remove')
        # same path resolution that drop will do
        paths_by_ds, errors = get_paths_by_ds(
            refds,
            dataset,
            ensure_list(path),
            # super-mode will readily tell us which datasets to
            # save as the end
            subdsroot_mode='super')

        drop_success = True
        for res in Drop.__call__(
                dataset=dataset,
                path=path,
                what=drop,
                reckless=reckless,
                recursive=True,
                recursion_limit=None,
                jobs=jobs,
                result_xfm=None,
                return_type='generator',
                result_renderer='disabled',
                # delegate error handling here
                on_failure='ignore'):
            if res.get('status') not in ('ok', 'notneeded'):
                drop_success = False
            yield res

        if not drop_success:
            # there will be 'rm -rf' below, so play safe
            lgr.debug('Observed drop failure, will not attempt remove')
            return

        for dpath, paths in paths_by_ds.items():
            for delpath in ([dpath] if paths is None else paths):
                if lexists(str(delpath)):
                    # here we still have something around on the
                    # filesystem. There is no need to fiddle with
                    # Git, just wipe it out. A later save() will
                    # act on it properly
                    if delpath.is_dir():
                        lgr.debug('Remove directory: %s', delpath)
                        rmtree(delpath)
                    # cannot use .exists() must forsee dead symlinks
                    else:
                        lgr.debug('Remove file: %s', delpath)
                        delpath.unlink()
                    continue
                # if we get here, there is nothing on the file system
                # anymore at this path. Either because the parent
                # dataset vanished already, or because we dropped a
                # dataset, and it still needs to be unregistered
                # from its parent -> `git rm`
                if dpath.exists():
                    GitRepo(dpath).call_git(
                        # no need for recursion, we know that even the root
                        # path not longer exists
                        ['rm', '-q'],
                        files=[str(delpath.relative_to(dpath))])
                    # this path was already being removed by drop
                    # so it must belong to a dropped dataset
                    # save won't report about this, let's do it
                    yield dict(
                        action='remove',
                        status='ok',
                        path=str(delpath),
                        type='dataset',
                    )

        if not refds.is_installed():
            # we already dropped the whole thing
            return

        for res in Save.__call__(
                dataset=dataset,
                path=path,
                # we might have removed the reference dataset by now, recheck
                message=message if message else '[DATALAD] removed content',
                return_type='generator',
                result_renderer='disabled',
                result_xfm=None,
                result_filter=None,
                on_failure='ignore'):
            if res.get('action') == 'delete':
                # normalize to previous remove results
                res['action'] = 'remove'
            yield res