Ejemplo n.º 1
0
def index_traces(updates_directory, index_filename):
    index = UpdatesIndex(index_filename)
    tarnames_processed = index.tarnames
    for tarname in glob(join(updates_directory, '*.tar')):
        if basename(tarname) not in tarnames_processed:
            try:
                true_sum = splitext(basename(tarname))[0].split('_')[3]
            except IndexError:
                true_sum = None
            if true_sum is not None:
                hasher = md5()
                hasher.update(open(tarname, 'r').read())
                if hasher.hexdigest() != true_sum:
                    print 'skipping', tarname, '(invalid hash)'
                    continue

            tarball = tarfile.open(tarname, 'r')
            for tarmember in tarball.getmembers():
                tarhandle = tarball.extractfile(tarmember.name)
                update_content = GzipFile(fileobj=tarhandle).read()
                update = PassiveUpdate(update_content, onlyheaders=True)
                if update.anonymized:
                    signature = update.anonymization_signature
                else:
                    signature = 'unanonymized'
                index.index(basename(tarname), tarmember.name,
                            update.bismark_id, signature, update.creation_time,
                            update.sequence_number, len(update_content))
    index.finalize_indexing()
def index_traces(updates_directory, index_filename):
    index = UpdatesIndex(index_filename)
    tarnames_processed = index.tarnames
    for tarname in glob(join(updates_directory, '*.tar')):
        if basename(tarname) not in tarnames_processed:
            try:
                true_sum = splitext(basename(tarname))[0].split('_')[3]
            except IndexError:
                true_sum = None
            if true_sum is not None:
                hasher = md5()
                hasher.update(open(tarname, 'r').read())
                if hasher.hexdigest() != true_sum:
                    print 'skipping', tarname, '(invalid hash)'
                    continue

            tarball = tarfile.open(tarname, 'r')
            for tarmember in tarball.getmembers():
                tarhandle = tarball.extractfile(tarmember.name)
                update_content = GzipFile(fileobj=tarhandle).read()
                update = PassiveUpdate(update_content, onlyheaders=True)
                if update.anonymized:
                    signature = update.anonymization_signature
                else:
                    signature = 'unanonymized'
                index.index(basename(tarname),
                            tarmember.name,
                            update.bismark_id,
                            signature,
                            update.creation_time,
                            update.sequence_number,
                            len(update_content))
    index.finalize_indexing()