Exemple #1
0
def test_read_text():
    # TODO test bad chars
    path = os.path.join(TESTING_BASE_DIR, 'read_text.json')
    with open(path, 'w') as f:
        f.write(TEXT)
    # regular
    data = fileio.read_text(path)
    assert data == TEXT
    # utf8_strict
    data = fileio.read_text(path, utf8_strict=True)
    assert data == TEXT
    # clean up
    os.remove(path)
Exemple #2
0
def from_json(model, json_path, identifier, inherit=True):
    """Read the specified JSON file and properly instantiate object.
    
    @param model: LocalCollection, LocalEntity, or File
    @param json_path: absolute path to the object's .json file
    @param identifier: [optional] Identifier
    @param inherit: boolean Disable in loops to avoid infinite recursion
    @returns: object
    """
    document = None
    if not model:
        raise Exception('Cannot instantiate from JSON without a model object.')
    if not json_path:
        raise Exception('Bad path: %s' % json_path)
    if identifier.model in ['file']:
        # object_id is in .json file
        path = os.path.splitext(json_path)[0]
        document = model(path, identifier=identifier)
    else:
        # object_id is in object directory
        document = model(os.path.dirname(json_path), identifier=identifier)
    document_id = document.id  # save this just in case
    document.load_json(fileio.read_text(json_path))
    if not document.id:
        # id gets overwritten if document.json is blank
        document.id = document_id
    return document
Exemple #3
0
 def checksums(self, algo, force_read=False):
     """Calculates hash checksums for the Entity's files.
     
     Gets hashes from FILE.json metadata if the file(s) are absent
     from the filesystem (i.e. git-annex file symlinks).
     Overrides DDR.models.Entity.checksums.
     
     @param algo: str
     @param force_read: bool Traverse filesystem if true.
     @returns: list of (checksum, filepath) tuples
     """
     checksums = []
     if algo not in self.checksum_algorithms():
         raise Error('BAD ALGORITHM CHOICE: {}'.format(algo))
     for f in self._file_paths():
         cs = None
         ext = None
         pathname = os.path.splitext(f)[0]
         # from metadata file
         json_path = os.path.join(self.files_path, f)
         for field in json.loads(fileio.read_text(json_path)):
             for k,v in field.iteritems():
                 if k == algo:
                     cs = v
                 if k == 'basename_orig':
                     ext = os.path.splitext(v)[-1]
         fpath = pathname + ext
         if force_read:
             # from filesystem
             # git-annex files are present
             if os.path.exists(fpath):
                 cs = util.file_hash(fpath, algo)
         if cs:
             checksums.append( (cs, os.path.basename(fpath)) )
     return checksums
Exemple #4
0
def read_id_file(path):
    """Read file and return list of IDs
    
    @param path: str Absolute path to file.
    @returns: list of IDs
    """
    text = fileio.read_text(path)
    ids = [line.strip() for line in text.strip().split('\n')]
    return ids
Exemple #5
0
 def dump_xml(self):
     """Dump Entity data to mets.xml file.
     
     TODO This should not actually write the XML! It should return XML
     to the code that calls it.
     """
     return Template(
         fileio.read_text(config.TEMPLATE_METS_JINJA2)
     ).render(object=self)
Exemple #6
0
def merge_add( repo, file_path_rel ):
    """Adds file unless contains conflict markers
    """
    # check for merge conflict markers
    file_path_abs = os.path.join(repo.working_dir, file_path_rel)
    txt = fileio.read_text(file_path_abs)
    if (MERGE_MARKER_START in txt) or (MERGE_MARKER_MID in txt) or (MERGE_MARKER_END in txt):
        return 'ERROR: file still contains merge conflict markers'
    repo.git.add(file_path_rel)
    return 'ok'
Exemple #7
0
def load_json_lite(json_path, model, object_id):
    """Simply reads JSON file and adds object_id if it's a file
    
    @param json_path: str
    @param model: str
    @param object_id: str
    @returns: list of dicts
    """
    document = json.loads(fileio.read_text(json_path))
    if model == 'file':
        document.append( {'id':object_id} )
    return document
Exemple #8
0
def postjson(hosts, index, doctype, object_id, path):
    """Post raw JSON file to Elasticsearch (YMMV)
    
    This command is for posting raw JSON files.  If the file you wish to post
    is a DDR object, please use "ddrindex post".
    """
    status = docstore.Docstore(hosts, index).post_json(
        doctype,
        object_id,
        fileio.read_text(path)
    )
    click.echo(status)
Exemple #9
0
 def _read_fields(self, path):
     """Extracts specified fields from JSON
     """
     data = {}
     for d in json.loads(fileio.read_text(path)):
         key = d.keys()[0]
         if key in JSON_FIELDS.keys():
             # coerces to int
             if d.get(key) and isinstance(JSON_FIELDS[key], int):
                 data[key] = int(d[key])
             else:
                 data[key] = d[key]
     return data
Exemple #10
0
 def children(self, quick=False):
     """Returns list of the Collection's Entity objects.
     
     >>> c = Collection.from_json('/tmp/ddr-testing-123')
     >>> c.children()
     [<Entity ddr-testing-123-1>, <Entity ddr-testing-123-2>, ...]
     
     TODO use util.find_meta_files()
     
     @param quick: Boolean List only titles and IDs
     @param dicts: Boolean List only titles and IDs (dicts)
     @returns: list of Entities or ListEntity
     """
     entity_paths = []
     if os.path.exists(self.files_path):
         # TODO use cached list if available
         for eid in os.listdir(self.files_path):
             path = os.path.join(self.files_path, eid)
             entity_paths.append(path)
     entity_paths = util.natural_sort(entity_paths)
     entities = []
     for path in entity_paths:
         if quick:
             # fake Entity with just enough info for lists
             entity_json_path = os.path.join(path, 'entity.json')
             if os.path.exists(entity_json_path):
                 e = ListEntity()
                 e.identifier = Identifier(path=path)
                 e.id = e.identifier.id
                 for line in fileio.read_text(entity_json_path).split('\n'):
                     if '"title":' in line:
                         e.title = json.loads('{%s}' % line)['title']
                     elif '"signature_id":' in line:
                         e.signature_id = json.loads('{%s}' %
                                                     line)['signature_id']
                         e.signature_abs = common.signature_abs(
                             e, self.identifier.basepath)
                     if e.title and e.signature_id:
                         # stop once we have what we need so we don't waste time
                         # and have entity.children as separate ghost entities
                         break
                 entities.append(e)
         else:
             entity = Entity.from_identifier(Identifier(path=path))
             for lv in entity.labels_values():
                 if lv['label'] == 'title':
                     entity.title = lv['value']
             entities.append(entity)
     return entities
def analyze_files(paths, verbose=False):
    """Opens files with strict encoding; lists paths that throw exceptions
    
    @param paths: list
    @param verbose: boolean
    @returns: list of defective paths
    """
    defects = []
    for path in paths:
        bad = 0
        try:
            text = fileio.read_text(path, utf8_strict=True)
        except:
            bad += 1
            defects.append(path)
            text = fileio.read_text(path)
            guess = chardet.detect(text)
            if verbose:
                print('\n| {} {}'.format(path, guess))
        if (not bad) and verbose:
            sys.stdout.write('.')
    if len(paths) and verbose:
        print('')
    return defects
Exemple #12
0
 def _load_vocab_files(vocabs_path):
     """Loads vocabulary term files in the 'ddr' repository
     
     @param vocabs_path: Absolute path to dir containing vocab .json files.
     @returns: list of raw text contents of files.
     """
     json_paths = []
     for p in os.listdir(vocabs_path):
         path = os.path.join(vocabs_path, p)
         if os.path.splitext(path)[1] == '.json':
             json_paths.append(path)
     json_texts = [
         fileio.read_text(path)
         for path in json_paths
     ]
     return json_texts
Exemple #13
0
def model_def_fields(document):
    """
    Wrapper around DDR.models.model_def_fields
    """
    module = document.identifier.fields_module()
    json_text = fileio.read_text(document.json_path)
    result = modules.Module(module).cmp_model_definition_fields(json_text)
    added = result['added']
    removed = result['removed']
    # 'File.path_rel' is created when instantiating Files,
    # is not part of model definitions.
    def rm_path_rel(fields):
        if 'path_rel' in fields:
            fields.remove('path_rel')
    rm_path_rel(added)
    rm_path_rel(removed)
    if added:
        document.model_def_fields_added = added
        document.model_def_fields_added_msg = WEBUI_MESSAGES['MODEL_DEF_FIELDS_ADDED'] % added
    if removed:
        document.model_def_fields_removed = removed
        document.model_def_fields_removed_msg = WEBUI_MESSAGES['MODEL_DEF_FIELDS_REMOVED'] % removed
Exemple #14
0
 def links_incoming(self):
     """List of path_rels of files that link to this file.
     """
     incoming = []
     cmd = 'find {} -name "*.json" -print'.format(self.entity_files_path)
     r = envoy.run(cmd)
     jsons = []
     if r.std_out:
         jsons = r.std_out.strip().split('\n')
     for filename in jsons:
         data = json.loads(fileio.read_text(filename))
         path_rel = None
         for field in data:
             if field.get('path_rel', None):
                 path_rel = field['path_rel']
         for field in data:
             linksraw = field.get('links', None)
             if linksraw:
                 for link in linksraw.strip().split(';'):
                     link = link.strip()
                     if self.basename in link:
                         incoming.append(path_rel)
     return incoming
Exemple #15
0
 def links_incoming( self ):
     """List of path_rels of files that link to this file.
     """
     incoming = []
     cmd = 'find {} -name "*.json" -print'.format(self.entity_files_path)
     r = envoy.run(cmd)
     jsons = []
     if r.std_out:
         jsons = r.std_out.strip().split('\n')
     for filename in jsons:
         data = json.loads(fileio.read_text(filename))
         path_rel = None
         for field in data:
             if field.get('path_rel',None):
                 path_rel = field['path_rel']
         for field in data:
             linksraw = field.get('links', None)
             if linksraw:
                 for link in linksraw.strip().split(';'):
                     link = link.strip()
                     if self.basename in link:
                         incoming.append(path_rel)
     return incoming
Exemple #16
0
def sort_file_paths(json_paths, rank='role-eid-sort'):
    """Sort file JSON paths in human-friendly order.
    
    TODO this belongs in DDR.identifier
    
    @param json_paths: 
    @param rank: 'role-eid-sort' or 'eid-sort-role'
    """
    paths = {}
    keys = []
    while json_paths:
        path = json_paths.pop()
        identifier = Identifier(path=path)
        eid = identifier.parts.get('eid',None)
        role = identifier.parts.get('role',None)
        sha1 = identifier.parts.get('sha1',None)
        sort = 0
        for line in fileio.read_text(path).splitlines():
            if 'sort' in line:
                sort = line.split(':')[1].replace('"','').strip()
        eid = str(eid)
        sha1 = str(sha1)
        sort = str(sort)
        if rank == 'eid-sort-role':
            key = '-'.join([str(eid),sort,role,sha1])
        elif rank == 'role-eid-sort':
            key = '-'.join([role,eid,sort,sha1])
        paths[key] = path
        keys.append(key)
    keys_sorted = [key for key in util.natural_sort(keys)]
    paths_sorted = []
    while keys_sorted:
        val = paths.pop(keys_sorted.pop(), None)
        if val:
            paths_sorted.append(val)
    return paths_sorted
Exemple #17
0
def sort_file_paths(json_paths, rank='role-eid-sort'):
    """Sort file JSON paths in human-friendly order.
    
    TODO this belongs in DDR.identifier
    
    @param json_paths: 
    @param rank: 'role-eid-sort' or 'eid-sort-role'
    """
    paths = {}
    keys = []
    while json_paths:
        path = json_paths.pop()
        identifier = Identifier(path=path)
        eid = identifier.parts.get('eid',None)
        role = identifier.parts.get('role',None)
        sha1 = identifier.parts.get('sha1',None)
        sort = 0
        for line in fileio.read_text(path).splitlines():
            if 'sort' in line:
                sort = line.split(':')[1].replace('"','').strip()
        eid = str(eid)
        sha1 = str(sha1)
        sort = str(sort)
        if rank == 'eid-sort-role':
            key = '-'.join([str(eid),sort,role,sha1])
        elif rank == 'role-eid-sort':
            key = '-'.join([role,eid,sort,sha1])
        paths[key] = path
        keys.append(key)
    keys_sorted = [key for key in util.natural_sort(keys)]
    paths_sorted = []
    while keys_sorted:
        val = paths.pop(keys_sorted.pop(), None)
        if val:
            paths_sorted.append(val)
    return paths_sorted
Exemple #18
0
 def log(self):
     log = ''
     if os.path.exists(self.logpath):
         log = fileio.read_text(self.logpath)
     return log
Exemple #19
0
def load_template(filename):
    return fileio.read_text(filename)
Exemple #20
0
 def log(self):
     log = ''
     if os.path.exists(self.logpath):
         log = fileio.read_text(self.logpath)
     return log
Exemple #21
0
def read_changelog(path: str) -> List[Dict[str, object]]:
    """
    @param path: Absolute path to changelog file.
    @returns list of entry dicts
    """
    return read_entries(fileio.read_text(path))
Exemple #22
0
 def gitignore(self):
     if not os.path.exists(self.gitignore_path):
         fileio.write_text(fileio.read_text(GITIGNORE_TEMPLATE),
                           self.gitignore_path)
     return fileio.read_text(self.gitignore_path)
Exemple #23
0
def load_json(path):
    try:
        data = json.loads(fileio.read_text(path))
    except json.JSONDecodeError:
        raise Exception('json.errors.JSONDecodeError reading %s' % path)
    return data
Exemple #24
0
def load_template(filename: str) -> str:
    return fileio.read_text(filename)
Exemple #25
0
def load_json(path):
    try:
        data = json.loads(fileio.read_text(path))
    except json.errors.JSONDecodeError:
        raise Exception('simplejson.errors.JSONDecodeError reading %s' % path)
    return data