Example #1
0
 def _check_update(self, doc_name, definition):
     from helpers import md5sum
     from helpers.document import load_definition, definition_lookup
     
     path = definition_lookup(definition)
     
     md5 = md5sum(path)
     if not self.has_document(doc_name):
         load_definition(self, path)
         row = self.load_document(doc_name)
         row.update_property('info', 'definition_hash', md5)
     else:
         row = self.load_document(doc_name)
         if md5!=row.property('info', 'definition_hash'):
             #load_definition(self, path)
             row.update_property('info', 'definition_hash', md5)
Example #2
0
    def __call__(self, *args, **kwargs):
        sum_file = os.path.splitext(self.fn)[0] + '.md5'

        if not os.path.exists(sum_file):
            print(f'# no sum file: {sum_file}')
            print(f'rm {self.fn}')
            return

        with open(sum_file, 'r') as f:
            md5_in_sum_file = f.read().strip()

        md5_in_bam_file = md5sum(self.fn)

        if md5_in_bam_file != md5_in_sum_file:
            print(f'# bam md5: {md5_in_bam_file}  sum file: {md5_in_sum_file}')
            print(f'rm {self.fn}')
Example #3
0
    def create_files_info(self,file_paths,file_sizes=None,
                               create_md5=False,rel_file_base=None):
        """ create dict of file info for the info section of meta data.
            file_paths can also be a dict who's key is the file path
            and the value is the file size """

        if not file_sizes:
            file_sizes = determine_file_sizes(file_paths)

        files_info = []
        # go through our files adding thier info dict
        for path in file_paths:
            name = get_file_name(path,rel_file_base)
            file_info = {
                'length': file_sizes.get(path),
                'path': [x for x in name.split(os.sep) if x.strip()]
            }
            if create_md5:
                file_info['md5sum'] = md5sum(path)
            files_info.append(file_info)

        return files_info
Example #4
0
    def create_info_dict(self,file_paths,pieces=None,file_sizes=None,
                         piece_size=None,total_size=None,
                         private=False,create_md5=False,file_name=None,
                         rel_file_base=None):
        """ creates a dict of the 'info' part of the meta data """
        # fill out our data
        if not file_sizes:
            file_sizes = determine_file_sizes(file_paths)
        if not total_size:
            total_size = sum(file_sizes.itervalues())
        if not piece_size:
            piece_size = determine_piece_size(total_size)

        # create our meta data dict
        info_data = {
            'piece length': piece_size,
            'pieces': ''.join(pieces),
            'private': 1 if private else 0,
        }

        # don't have to have a file name
        if file_name:
            info_data['name'] = file_name

        # we need to determine common prefix for all the files
        # it will be our rel base, any paths for the info will
        # be relative to it
        rel_file_base = os.path.commonprefix(file_paths)

        log.debug('rel file base: %s',rel_file_base)

        # length only appropriate if there is a single file
        if len(file_paths) == 1:
            info_data['length'] = total_size

            # if they want us to create the optional md5
            # for the files than lets do so
            if create_md5:
                info_data['md5sum'] = md5sum(file_paths[0])

            if not info_data.get('name'):
                # we'll go ahead and put a name
                info_data['name'] = get_file_name(file_paths[0],
                                                  rel_file_base)

        # if it's multiple files we give it each one individually
        else:
            info_data['files'] = self.create_files_info(file_paths,
                                                        file_sizes,
                                                        create_md5,
                                                        rel_file_base)

            if not info_data.get('name'):
                # guess a name
                name = get_common_name(file_paths)
                if name:
                    info_data['name'] = name

        # make sure our meta info is valid
        try:
            validate_info_data(info_data)
        except Exception, ex:
            raise