Exemple #1
0
def process_file(filename, folder=None):
    if not filename:
        raise Exception("File not found")

    cur_file = None
    for tarfile, f_info in _extract_tar(filename):

        if cur_file != f_info.name:
            cur_file = f_info.name
            logger.info("Parsing: %s" % cur_file)

        f = tarfile.extractfile(f_info)
        lines = (line for line in _extract_data(f))
        frames = []
        for line in lines:
            _line = _extract_multiple_data(line)
            _line["time"] = "{}:{}".format(cur_file.split(".")[0][-4:-2], cur_file.split(".")[0][-2:])
            # call_info = {k:v for k,v in _line}

            def is_good_call(call):
                if call["duration"] and call["duration"] != "0":
                    return True

                return bool(call["status"] not in ["487", "402"] and
                            int(call["num_call_ringtone"]) > 500)

            _line["failed"] = not is_good_call(_line)
            frames.append(_line)
        yield frames
Exemple #2
0
def load_chunk(tarfile, size=None):
  """Load a number of images from a single imagenet .tar file.

  This function also converts the image from grayscale to RGB if necessary.

  Args:
    tarfile (tarfile.TarFile): The archive from which the files get loaded.
    size (Optional[Tuple[int, int]]): Resize the image to this size if provided.

  Returns:
    numpy.ndarray: Contains the image data in format [batch, w, h, c]
  """
  result = []
  filenames = []
  for member in tarfile.getmembers():
    filename = member.path
    content = tarfile.extractfile(member)
    img = Image.open(content)
    rgbimg = Image.new("RGB", img.size)
    rgbimg.paste(img)
    if size != None:
      rgbimg = rgbimg.resize(size, Image.ANTIALIAS)
    result.append(np.array(rgbimg).reshape(1, rgbimg.size[0], rgbimg.size[1], 3))
    filenames.append(filename)
  return np.concatenate(result), filenames
Exemple #3
0
def hashcalc(tarfile, filename, member, hashtype, extract, data):
    hashtxt = 0

    if member.isdir():
        if extract == 1:
            if not os.path.exists(filename):
                os.makedirs(filename)
                os.chmod(filename, member.mode)
                os.chown(filename, member.uid, member.gid)

    if member.isfile():
        filecontents = tarfile.extractfile(filename).read()
        if hashtype == 'md5':
            hash = hashlib.md5(filecontents)
        elif hashtype == 'sha1':
            hash = hashlib.sha1(filecontents)
        elif hashtype == 'sha256':
            hash = hashlib.sha256(filecontents)
        elif hashtype == 'sha512':
            hash = hashlib.sha512(filecontents)
        #print(hash.hexdigest(), end ="|")
        hashtxt = hash.hexdigest()
        data.append(hashtxt)
        if extract == 1:
            filenamechk = filename + "_" + hashtxt
            if not os.path.isfile(filenamechk):
                fout = open(filenamechk, 'wb')
                fout.write(filecontents)
                fout.close()
                os.chmod(filenamechk, member.mode)
                os.chown(filenamechk, member.uid, member.gid)
    else:
        #print('0', end ="|")
        data.append('0')
    return data
Exemple #4
0
def load_chunk(tarfile, size=None):
    """Load a number of images from a single imagenet .tar file.

  This function also converts the image from grayscale to RGB if necessary.

  Args:
    tarfile (tarfile.TarFile): The archive from which the files get loaded.
    size (Optional[Tuple[int, int]]): Resize the image to this size if provided.

  Returns:
    numpy.ndarray: Contains the image data in format [batch, w, h, c]
  """
    result = []
    filenames = []
    for member in tarfile.getmembers():
        filename = member.path
        content = tarfile.extractfile(member)
        img = Image.open(content)
        rgbimg = Image.new("RGB", img.size)
        rgbimg.paste(img)
        if size != None:
            rgbimg = rgbimg.resize(size, Image.ANTIALIAS)
        result.append(
            np.array(rgbimg).reshape(1, rgbimg.size[0], rgbimg.size[1], 3))
        filenames.append(filename)
    return np.concatenate(result), filenames
Exemple #5
0
 def read_sp_manifest_file(path):
     # Must use forward slashes, not os.path.sep.
     fn_manifest = _tarfile_path_join(path, project.Job.FN_MANIFEST)
     try:
         with closing(tarfile.extractfile(fn_manifest)) as file:
             return json.loads(file.read())
     except KeyError:
         pass
Exemple #6
0
 def __init__(self, tarfile, file):
     self.fh = tarfile.extractfile(file)
     #        self.hsh = hashlib.new('ripemd160')
     self.hsh = hashlib.md5()
     self.data = self.fh.read(100 * 1024)
     while self.data:
         self.hsh.update(self.data)
         self.data = self.fh.read(100 * 1024)
Exemple #7
0
 def read_sp_manifest_file(path):
     fn_manifest = os.path.join(path, project.Job.FN_MANIFEST)
     try:
         with closing(tarfile.extractfile(fn_manifest)) as file:
             if sys.version_info < (3, 6):
                 return json.loads(file.read().decode())
             else:
                 return json.loads(file.read())
     except KeyError:
         pass
Exemple #8
0
 def __getPostScript(self, tarfile, key, package):
     """
     Writes a post-install script to the file system and makes it ready to
     be executed.
     """
     scriptname = ('/tmp/%s-%s' % (package, key.split('/')[-1]))
     fh = open(scriptname, "w")
     fh.write(tarfile.extractfile(key).read())
     fh.close()
     os.chmod(scriptname, 0700)
     return scriptname
def extract_icon(tarfile, iconName, newIconName):
  extractName = iconName
  if iconName.startswith('/'):
    extractName = iconName[1:]
  try:
    iconFile = tarfile.extractfile(extractName)
    outicon = open(newIconName, "w")
    outicon.write(iconFile.read())
    outicon.close()
    iconFile.close()
    logging.debug("wrote iconfile '%s' (from '%s') " % (os.path.basename(outicon.name),iconName))
    return True
  except Exception,e:
    # we may sometimes get very confusing errors from tarfile here
    # (like 'filename None not found' from xmms) - this usually means something is strange in the
    # tarball eg. xmms.xpm is a symlink to the (non-existant) xmms_mini.xpm
    logging.error("ERROR: Icon '%s' for could not be obtained: %s " % (iconName,e))
def extract_icon(tarfile, iconName, newIconName):
    extractName = iconName
    if iconName.startswith('/'):
        extractName = iconName[1:]
    try:
        iconFile = tarfile.extractfile(extractName)
        outicon = open(newIconName, "w")
        outicon.write(iconFile.read())
        outicon.close()
        iconFile.close()
        logging.debug("wrote iconfile '%s' (from '%s') " %
                      (os.path.basename(outicon.name), iconName))
        return True
    except Exception, e:
        # we may sometimes get very confusing errors from tarfile here
        # (like 'filename None not found' from xmms) - this usually means something is strange in the
        # tarball eg. xmms.xpm is a symlink to the (non-existant) xmms_mini.xpm
        logging.error("ERROR: Icon '%s' for could not be obtained: %s " %
                      (iconName, e))
 def action(self, tarfile, tarinfo):
         if tarinfo.isreg():
                 return file.FileAction(tarfile.extractfile(tarinfo),
                     mode=oct(stat.S_IMODE(tarinfo.mode)),
                     owner=tarinfo.uname, group=tarinfo.gname,
                     path=tarinfo.name,
                     timestamp=misc.time_to_timestamp(tarinfo.mtime))
         elif tarinfo.isdir():
                 return directory.DirectoryAction(
                     mode=oct(stat.S_IMODE(tarinfo.mode)),
                     owner=tarinfo.uname, group=tarinfo.gname,
                     path=tarinfo.name)
         elif tarinfo.issym():
                 return link.LinkAction(path=tarinfo.name,
                     target=tarinfo.linkname)
         elif tarinfo.islnk():
                 return hardlink.HardLinkAction(path=tarinfo.name,
                     target=tarinfo.linkname)
         else:
                 return unknown.UnknownAction(path=tarinfo.name)
Exemple #12
0
 def action(self, tarfile, tarinfo):
         if tarinfo.isreg():
                 return file.FileAction(tarfile.extractfile(tarinfo),
                     mode=oct(stat.S_IMODE(tarinfo.mode)),
                     owner=tarinfo.uname, group=tarinfo.gname,
                     path=tarinfo.name,
                     timestamp=misc.time_to_timestamp(tarinfo.mtime))
         elif tarinfo.isdir():
                 return directory.DirectoryAction(
                     mode=oct(stat.S_IMODE(tarinfo.mode)),
                     owner=tarinfo.uname, group=tarinfo.gname,
                     path=tarinfo.name)
         elif tarinfo.issym():
                 return link.LinkAction(path=tarinfo.name,
                     target=tarinfo.linkname)
         elif tarinfo.islnk():
                 return hardlink.HardLinkAction(path=tarinfo.name,
                     target=tarinfo.linkname)
         else:
                 return unknown.UnknownAction(path=tarinfo.name)
Exemple #13
0
    def read_sp_manifest_file(path):
        """Read state point from the manifest file.

        Parameters
        ----------
        path : str
            Path to manifest file.

        Returns
        -------
        dict
            state point.

        """
        # Must use forward slashes, not os.path.sep.
        fn_manifest = _tarfile_path_join(path, project.Job.FN_MANIFEST)
        try:
            with closing(tarfile.extractfile(fn_manifest)) as file:
                return json.loads(file.read())
        except KeyError:
            pass
Exemple #14
0
def stream_read_file(tarfile: Any, path: str, max_size: int) -> bytes:
    """
    Instead of reading everything in one go which is vulnerable to
    zip bombs, stream and accumulate the bytes
    :param tarfile:
    :param path: path to file to read in tar file
    :param max_size: maximum allowed size
    :raises MaxFileSizeExceeded: if the maximum size was reached
    :return: the file as binary
    """
    file = tarfile.extractfile(path)

    size = 0
    result = b''
    while True:
        size += 1024
        if size > max_size:
            msg = 'file %s was bigger than allowed %i bytes' % (path, max_size)
            raise MaxFileSizeExceeded(msg)
        chunk = file.read(1024)
        if not chunk:
            break
        result += chunk
    return result
Exemple #15
0
def stream_read_file(tarfile: Any, path: str, max_size: int) -> bytes:
    """
    Instead of reading everything in one go which is vulnerable to
    zip bombs, stream and accumulate the bytes
    :param tarfile:
    :param path: path to file to read in tar file
    :param max_size: maximum allowed size
    :raises MaxFileSizeExceeded: if the maximum size was reached
    :return: the file as binary
    """
    file = tarfile.extractfile(path)

    size = 0
    result = b''
    while True:
        size += 1024
        if size > max_size:
            msg = 'file %s was bigger than allowed %i bytes' % (path, max_size)
            raise MaxFileSizeExceeded(msg)
        chunk = file.read(1024)
        if not chunk:
            break
        result += chunk
    return result
Exemple #16
0
 def _get_json_from_tarfile(tarfile, json_name):
     json_file = (tarfile.extractfile(
         tarfile.getmember(json_name)).read().decode("utf8"))
     return json.loads(json_file)
Exemple #17
0
    def _package_chart(self, tarfile, version=None, **kwargs):
        '''Internal Helper

        Internal method to make it easier to hanle closing
        the tarfile passed here automatically on exit.
        '''
        def get_data(filename):
            membername = os.path.join(self.name, filename)
            yaml = tarfile.extractfile(membername)
            return membername, ruamel.yaml.load(
                yaml, Loader=ruamel.yaml.RoundTripLoader)

        chart_file, chart_data = get_data('Chart.yaml')
        chart_data['version'] = version

        values_file, values_data = get_data('values.yaml')
        values = self.data.get('values', None)
        if values:
            # TODO(kerrin) expand the amount of data available
            # for users to control
            data = {
                'version': version,
                'name': self.name,
            }
            data.update(kwargs)

            def expand_values(source, expanded):
                for key, value in source.items():
                    if isinstance(value, dict):
                        try:
                            expand_values(value, expanded[key])
                        except KeyError as e:
                            raise windlass.exc.MissingEntryInChartValues(
                                expected_source=source,
                                missing_key=e.args[0],
                                values_filename=values_file,
                                chart_name=self.name)
                    else:
                        newvalue = value.format(**data)
                        expanded[key] = newvalue

            # Update by reference the values_data dictionary based on
            # the format of the supplied values field.
            expand_values(values, values_data)

        with tempfile.NamedTemporaryFile() as tmp_file:
            with tarfile.open(tmp_file.name, 'w:gz') as out:
                for member in tarfile.getmembers():
                    if member.name == chart_file:
                        # Override the size of the file
                        datastr = ruamel.yaml.dump(
                            chart_data, Dumper=ruamel.yaml.RoundTripDumper)
                        databytes = datastr.encode('utf-8')
                        member.size = len(databytes)
                        out.addfile(member, io.BytesIO(databytes))
                    elif member.name == values_file:
                        # Override the size of the file
                        datastr = ruamel.yaml.dump(
                            values_data, Dumper=ruamel.yaml.RoundTripDumper)
                        databytes = datastr.encode('utf-8')
                        member.size = len(databytes)
                        out.addfile(member, io.BytesIO(databytes))
                    else:
                        out.addfile(member, tarfile.extractfile(member.name))

            with open(tmp_file.name, 'rb') as fp:
                return fp.read()
Exemple #18
0
def main():
    logging.info('Beginning cron job')
    conn = sqlite3.connect(conf.DSN)
    c = conn.cursor()

    release_cache = DBCache(conn.cursor(), 'releases', 'name')
    package_cache = DBCache(conn.cursor(), 'packages', 'name')
    locale_cache = DBCache(conn.cursor(), 'locales', 'name')
    section_cache = DBCache(conn.cursor(), 'sections', 'section')
    for release, package in iter_packages():
        release_id = release_cache[release]
        package_id = package_cache[package['Package']]

        package_path = conf.MIRROR + '/' + package['Filename']
        if not os.path.exists(package_path):
            logging.error('File not found for package {0} ({1})'\
                .format(package['Package'], package['Filename']))
            continue

        try:
            tarfile = get_tarfile(package_path)
        except CorruptArchiveException:
            continue
        for tarinfo in tarfile:
            match = MAN_REGEX.search(tarinfo.name)
            simple_match = SIMPLE_MAN_REGEX.search(tarinfo.name)
            if simple_match and not match:
                logging.info('Simple regex matched line but fancy didn\'t: '
                             '{0} in {1}'.format(tarinfo.name, package_path))
            if not match:
                continue

            section = match.group('section') + \
                match.group('extrasection')
            section_id = section_cache[section]

            name = match.group('manpage')
            if '/' in name:
                logging.error('Invalid manpage name in package {0}.'\
                    .format(package['Package']))
                continue

            if match.group('locale'):
                # strip leading /
                locale = match.group('locale')[1:]
            else:
                locale = 'DEFAULT_LOCALE'
            locale_id = locale_cache[locale]

            if tarinfo.issym():
                target = os.path.dirname(tarinfo.name)
                target = target + '/' + tarinfo.linkname
                target = './' + os.path.normpath(target)

                target_match = MAN_REGEX.search(target)
                if not target_match:
                    logging.error('The symlink for {0} in {1} is really '
                                  'broken.'.format(tarinfo.name, package_path))
                    continue

                if target_match.group('locale'):
                    target_locale = target_match.group('locale')[1:]
                else:
                    target_locale = 'DEFAULT_LOCALE'

                c.execute(
                    """INSERT INTO symlinks
                (link_release, link_section, link_name, link_locale,
                 target_release, target_section, target_name, target_locale)
                VALUES
                (?, ?, ?, ?, ?, ?, ?, ?)""",
                    (release_id, section_id, name, locale_id, release_id,
                     section_cache[target_match.group('section') +
                                   target_match.group('extrasection')],
                     target_match.group('manpage'),
                     locale_cache[target_locale]))
                continue

            try:
                contents = tarfile.extractfile(tarinfo.name)
            except KeyError:
                logging.error('Unable to find file {0} in {1}, possibly a '
                    'symlink to something in another package.'\
                    .format(tarinfo.name, package_path))
                continue
            if contents == None:
                logging.error('Didn\'t find {0} in {1}'.format(
                    tarinfo.name, package_path))
                continue
            contents = contents.read()

            try:
                apropos = get_apropos(contents, name, locale)
            except AproposException as e:
                #logging.info('Apropos error {0} for {1} ({2})'\
                #    .format(e.args, package['Package'], line.strip()))
                apropos = None

            if conf.COPY_MANPAGES:
                # cache the troff file and save its path
                cache_dir = get_path(release, package['Package'],
                                     package['Version'], locale, section)
                path = cache_dir + '/' + name + '.gz'
                with open(path, 'wb') as fd:
                    fd.write(contents)
            else:
                # save the deb
                path = package['Filename']
            try:
                c.execute(
                    'INSERT INTO manpages '
                    '(id, release, section, package, name, path, version, '
                    'locale) VALUES (NULL, ?, ?, ?, ?, ?, ?, ?)',
                    (release_id, section_id, package_id, name, path,
                     package['Version'], locale_id))
                manpage_id = c.lastrowid
            except sqlite3.IntegrityError as e:
                logging.error('Duplicate primary key: '
                              '(release: {0}, section: {1}, package: {2}, '
                              'name: {3}, locale: {4})'.format(
                                  release, section_id, package['Package'],
                                  name, locale))
                continue

            c.execute('INSERT INTO aproposes (docid, apropos) VALUES (?, ?)',
                      (manpage_id, apropos))
    conn.commit()
    conn.close()
Exemple #19
0
 def get_data(filename):
     membername = os.path.join(self.name, filename)
     yaml = tarfile.extractfile(membername)
     return membername, ruamel.yaml.load(
         yaml, Loader=ruamel.yaml.RoundTripLoader)
Exemple #20
0
def get_attribute_from_tarfile(attribute, tarfile):
    return tarfile.extractfile(attribute).read().strip()
Exemple #21
0
def commit_logs(b, wd, *args, **kwargs):
    opts = kwargs['opts'] if 'opts' in kwargs else None
    push = kwargs['push'] if 'push' in kwargs else True
    alt_year_month = kwargs['year_month'] if 'year_month' in kwargs else None
    tarfile = kwargs['tarfile'] if 'tarfile' in kwargs else None
    tarballname = kwargs['tarballname'] if 'tarballname' in kwargs else None
    osver = kwargs['osver'] if 'osver' in kwargs else None
    allowed_fields = opts.allowed_fields if opts is not None and \
        hasattr(opts, 'allowed_fields') else []

    # for error reporting:
    testdir = kwargs['testdir'] if 'testdir' in kwargs else None
    if tarfile is not None and testdir is None: testdir = '<tarfile>'
    # TODOXXX also extract datestamp for bunsen-push upload

    # XXX tmpdir is required for unxzing
    tmpdir = kwargs['tmpdir'] if 'tmpdir' in kwargs else None
    tmpdir_created = False
    if tmpdir is None:
        tmpdir_created = True
        tmpdir = tempfile.mkdtemp()

    # flatten list of args to list of (path, OPTIONAL tarfile.TarInfo)
    logfiles = flatten_logfiles(args)

    for logfile, tarinfo in logfiles:
        if logfile == 'BUNSEN_COMMIT': continue # don't add to commit
        if logfile == 'year_month.txt': continue # don't add to commit
        if logfile.startswith('index.html'): continue # don't add to commit
        if logfile.startswith('baseline'): continue # don't add to commit
        if logfile.startswith('xfail'): continue # don't add to commit
        if logfile.startswith('previous_'): continue # don't add to commit
        if tarinfo is not None:
            t = tarfile.extractfile(tarinfo)
            logname = os.path.basename(logfile)
            with open(os.path.join(tmpdir, logname), 'wb') as f:
                f.write(t.read()) # TODOXXX read_decode utf-8?
            logpath = os.path.join(tmpdir, logname)
        else:
            logpath = os.path.join(testdir, logfile)
        if os.path.isdir(logpath): continue # don't add to commit
        add_testlog_or_xz(b, tmpdir, logpath)

    testrun = Testrun()
    all_cases = []
    gdb_README = pick_testlog(testdir, tmpdir, 'README.txt')
    gdb_sum = pick_testlog(testdir, tmpdir, 'gdb.sum') # XXX parser autodetects .xz
    gdb_log = pick_testlog(testdir, tmpdir, 'gdb.log') # XXX parser autodetects .xz
    testrun = parse_README(testrun, gdb_README)
    testrun.osver = osver
    testrun = parse_dejagnu_sum(testrun, gdb_sum, all_cases=all_cases)
    testrun = annotate_dejagnu_log(testrun, gdb_log, all_cases, verbose=False)
    for field_name in allowed_fields:
        if hasattr(opts,fieldname): # <TODO> opts should support dict operations, here and elsewhere
            testrun[field_name] = getattr(opts, field_name)

    if testrun is None:
        b.reset_all()
        return None # TODOXXX Pass error message?

    b.add_testrun(testrun)

    if testrun.year_month is None and alt_year_month is not None:
        testrun.year_month = alt_year_month
    # TODOXXX handle year_month from tarballname
    if testrun.year_month is None:
        print("WARNING: skipped {} due to missing year_month"\
              .format(testdir))
        b.reset_all()
        return None # TODOXXX Pass error message?

    # XXX To avoid huge working copies, use branch_extra to split testruns branches by source buildbot:
    if 'osver' in testrun:
        commit_id = b.commit(opts.tag, wd=wd, push=False, allow_duplicates=False, extra_label=testrun.osver)
    else:
        # TODOXXX Need to extract osver more diligently for tarfile submissions:
        commit_id = b.commit(opts.tag, wd=wd, push=False, allow_duplicates=False)
    #commit_id = b.commit(opts.tag, wd=wd, push=False, allow_duplicates=True, wd_index=wd_index, wd_testruns=wd_testruns)

    if push:
        wd.push_all()

    if tmpdir_created: shutil.rmtree(tmpdir)

    return commit_id
Exemple #22
0
def main():
    logging.info('Beginning cron job')
    conn = sqlite3.connect(conf.DSN)
    c = conn.cursor()

    release_cache = DBCache(conn.cursor(), 'releases', 'name')
    package_cache = DBCache(conn.cursor(), 'packages', 'name')
    locale_cache = DBCache(conn.cursor(), 'locales', 'name')
    section_cache = DBCache(conn.cursor(), 'sections', 'section')
    for release, package in iter_packages():
        release_id = release_cache[release]
        package_id = package_cache[package['Package']]

        package_path = conf.MIRROR + '/' + package['Filename']
        if not os.path.exists(package_path):
            logging.error('File not found for package {0} ({1})'\
                .format(package['Package'], package['Filename']))
            continue

        try:
            tarfile = get_tarfile(package_path)
        except CorruptArchiveException:
            continue
        for tarinfo in tarfile:
            match = MAN_REGEX.search(tarinfo.name)
            simple_match = SIMPLE_MAN_REGEX.search(tarinfo.name)
            if simple_match and not match:
                logging.info('Simple regex matched line but fancy didn\'t: '
                    '{0} in {1}'.format(tarinfo.name, package_path))
            if not match:
                continue

            section = match.group('section') + \
                match.group('extrasection')
            section_id = section_cache[section]

            name = match.group('manpage')
            if '/' in name:
                logging.error('Invalid manpage name in package {0}.'\
                    .format(package['Package']))
                continue

            if match.group('locale'):
                # strip leading /
                locale = match.group('locale')[1:]
            else:
                locale = 'DEFAULT_LOCALE'
            locale_id = locale_cache[locale]

            if tarinfo.issym():
                target = os.path.dirname(tarinfo.name)
                target = target + '/' + tarinfo.linkname
                target = './' + os.path.normpath(target)

                target_match = MAN_REGEX.search(target)
                if not target_match:
                    logging.error('The symlink for {0} in {1} is really '
                        'broken.'.format(tarinfo.name, package_path))
                    continue

                if target_match.group('locale'):
                    target_locale = target_match.group('locale')[1:]
                else:
                    target_locale = 'DEFAULT_LOCALE'

                c.execute("""INSERT INTO symlinks
                (link_release, link_section, link_name, link_locale,
                 target_release, target_section, target_name, target_locale)
                VALUES
                (?, ?, ?, ?, ?, ?, ?, ?)""",
                    (release_id, section_id, name, locale_id,
                     release_id,
                     section_cache[target_match.group('section') +
                        target_match.group('extrasection')],
                     target_match.group('manpage'),
                     locale_cache[target_locale]))
                continue

            try:
                contents = tarfile.extractfile(tarinfo.name)
            except KeyError:
                logging.error('Unable to find file {0} in {1}, possibly a '
                    'symlink to something in another package.'\
                    .format(tarinfo.name, package_path))
                continue
            if contents == None:
                logging.error('Didn\'t find {0} in {1}'.format(tarinfo.name,
                    package_path))
                continue
            contents = contents.read()

            try:
                apropos = get_apropos(contents, name, locale)
            except AproposException as e:
                #logging.info('Apropos error {0} for {1} ({2})'\
                #    .format(e.args, package['Package'], line.strip()))
                apropos = None

            if conf.COPY_MANPAGES:
                # cache the troff file and save its path
                cache_dir = get_path(release, package['Package'],
                    package['Version'], locale, section)
                path = cache_dir + '/' + name + '.gz'
                with open(path, 'wb') as fd:
                    fd.write(contents)
            else:
                # save the deb
                path = package['Filename']
            try:
                c.execute('INSERT INTO manpages '
                    '(id, release, section, package, name, path, version, '
                    'locale) VALUES (NULL, ?, ?, ?, ?, ?, ?, ?)',
                    (release_id, section_id, package_id, name, path,
                    package['Version'], locale_id))
                manpage_id = c.lastrowid
            except sqlite3.IntegrityError as e:
                logging.error('Duplicate primary key: '
                    '(release: {0}, section: {1}, package: {2}, '
                    'name: {3}, locale: {4})'.format(release, section_id,
                    package['Package'], name, locale))
                continue

            c.execute('INSERT INTO aproposes (docid, apropos) VALUES (?, ?)',
                (manpage_id, apropos))
    conn.commit()
    conn.close()
Exemple #23
0
    print(datetime.now().strftime("%H:%M:%S>"),
          "created output directory called " + args.output_dir)
    os.makedirs(args.output_dir)

output_dir = args.output_dir + "/filtered_matrices_mex/hg19"

# %% Combine Inputs
combined_matrix = None
combined_barcodes = pd.DataFrame()
celltype_label = []

for filepath in glob.iglob(args.input_dir + "/*.tar.gz"):
    print(datetime.now().strftime("%H:%M:%S>"),
          "unpacking " + filepath[25:] + "...")
    tarfile = tarfile.open(filepath, "r:gz")
    mtx_file = tarfile.extractfile("filtered_matrices_mex/hg19/matrix.mtx")

    current_label = filepath[25:filepath.
                             find("_filtered_gene_bc_matrices.tar.gz")]
    current_matrix = scipy.io.mmread(mtx_file)
    combined_matrix = scipy.sparse.hstack((combined_matrix, current_matrix))

    # also export the gene files. (will be overwritten each round but whatev)
    genes_file = tarfile.extractfile("filtered_matrices_mex/hg19/genes.tsv")
    genes = pd.read_csv(genes_file, header=None, sep="\t")

    # export the cell barcodes
    barcodes_file = tarfile.extractfile(
        "filtered_matrices_mex/hg19/barcodes.tsv")
    barcodes = pd.read_csv(barcodes_file, header=None)
    combined_barcodes = combined_barcodes.append(barcodes)