Esempio n. 1
0
def _process_data(repo, uncompressed_pack, progress):
    logger.info('Dirtying objects for %s' % repo)
    type_mapper = {}
    for obj in uncompressed_pack.iterobjects():
        type_mapper[obj.id] = obj._type
        dirty = _objectify(id=obj.id, type=obj._type)
        dirty.mark_dirty(True)
        dirty.add_repository(repo)
        dirty.save()
    logger.info('Constructed object type map of size %s (%d bytes) for %s' %
                (len(type_mapper), type_mapper.__sizeof__(), repo))
    models.flush()

    logger.info('Now processing objects for %s' % repo)
    for obj in uncompressed_pack.iterobjects():
        _process_object(repo=repo,
                        obj=obj,
                        progress=progress,
                        type_mapper=type_mapper)

    logger.info('Cleaning objects for %s' % repo)
    for id, type in type_mapper.iteritems():
        dirty = _objectify(id=id, type=type)
        dirty.mark_dirty(False)
        dirty.save()
Esempio n. 2
0
    def do_request(self):
        urls = request.params.get("url", "").strip()
        if not urls:
            helpers.error("You did not provide a URL.")

        for url in urls.split("\n"):
            url = models.Repository.canonicalize(url.strip())
            if not url:
                continue

            if models.Repository.exists(url=url):
                repo = models.Repository.get_by_attributes(url=url)
                if repo.approved:
                    helpers.flash("Someone has already requested indexing of %s, " "so no worries." % url)
                else:
                    if not url.startswith("git://"):
                        helpers.flash(
                            "That repo (%s) has been already requested.  At the "
                            "moment, anygit only supports git protocol (git://) "
                            "repositories.  Once we've added support for this "
                            "repo's protocol, we'll index it." % url
                        )
                    elif not fetch.check_validity(repo):
                        helpers.error(
                            "That's odd... someone already asked for %s, but it looks "
                            "to us like we can't talk to that repo.  Is there a typo "
                            "in there?  If not, please email [email protected]." % url
                        )
                    else:
                        repo.approved = True
                        repo.save()
                        helpers.flash(
                            "Someone had requested %s before but it was down then. "
                            "Looks like it's back up now.  We'll get right to it." % url
                        )
            else:
                repo = models.Repository.create(url=url)
                if not url.startswith("git://"):
                    helpers.flash(
                        "Successfully requested %s for future indexing.  However, "
                        "please note that only git protocol (git://) "
                        "repositories are currently supported by anygit." % url
                    )
                # Make sure we can talk to it
                elif not fetch.check_validity(repo):
                    helpers.error("Could not talk to %s; are you sure it's a valid URL?" % url)
                else:
                    repo.approved = True
                    repo.save()
                    helpers.flash("Successfully requested %s for indexing." % url)

        models.flush()
        redirect_to("/")
Esempio n. 3
0
def fetch_and_index(repo, recover_mode=False, packfile=None, batch=None, unpack=False):
    check_for_die_file()
    if isinstance(repo, basestring):
        repo = models.Repository.get(repo)
    repo.refresh()
    # There's a race condition here where two indexing processes might
    # try to index the same repo.  However, since it's idempotent,
    # this is not harmful beyond wasting resources.  However, we check
    # here to try to minimize the damage.
    if repo.indexing:
        logger.error('Repo is already being indexed')
        return
    logger.info('Beginning to index: %s' % repo)
    now = datetime.datetime.now()
    data_path = None

    try:
        # Don't let other people try to index in parallel
        repo.indexing = True
        repo.dirty = True
        repo.save()
        models.flush()
        state = {}
        while True:
            data_path = fetch(repo, recover_mode=recover_mode,
                              packfile=packfile, batch=batch, state=state)
            index_data(data_path, repo, is_path=True, unpack=unpack)
            if not state.get('has_extra'):
                break
            else:
                logger.info('Still more remote heads, running again...')
        repo.count = repo.count_objects()
        repo.last_index = now
        repo.been_indexed = True
        repo.approved = True
        repo.dirty = False
        # Finally, clobber the old remote heads.
        repo.set_remote_heads(repo.new_remote_heads)
        repo.set_new_remote_heads([])
        repo.save()
        refresh_all_counts(all=False)
    except DeadRepo:
        logger.error('Marking %s as dead' % repo)
        repo.approved = 0
        repo.save()
    except KeyboardInterrupt:
        logger.info('^C pushed; exiting thread')
        raise
    except Exception, e:
        logger.error('Had a problem indexing %s: %s' % (repo, traceback.format_exc()))
Esempio n. 4
0
def refresh_all_counts(all=None):
    aggregator = models.Aggregate.get()
    aggregator.refresh_all_counts(all=all)
    aggregator.save()
    models.flush()
Esempio n. 5
0
        repo.been_indexed = True
        # Finally, clobber the old remote heads.
        repo.set_remote_heads(repo.new_remote_heads)
        repo.set_new_remote_heads([])
        repo.save()
    except Exception, e:
        logger.error('Had a problem: %s' % traceback.format_exc())
    finally:
        if not packfile and data_path:
            try:
                os.unlink(data_path)
            except IOError, e:
                logger.error('Could not remove tmpfile %s.: %s' % (data_path, e))
        repo.indexing = False
        repo.save()
        models.flush()
    logger.info('Done with %s' % repo)

def fetch_and_index_threaded(repo):
    models.setup()
    try:
        return fetch_and_index(repo)
    except DieFile:
        # TODO: do something to terminate the controller process too
        sys.exit(1)
    except:
        logger.error(traceback.format_exc())
        raise

def index_all(last_index=None, threads=1):
    repos = list(models.Repository.get_indexed_before(last_index))
Esempio n. 6
0
def create(url):
    canonical_url = models.Repository.canonicalize(url)
    r = models.Repository.get_or_create(url=canonical_url)
    r.approved = 'spidered'
    r.save()
    models.flush()