Esempio n. 1
0
  def update_pages(self):
    bower = Content.get_by_id('bower', parent=self.version_key)
    if bower is None:
      return

    bower_json = bower.get_json()

    for _, path in bower_json.get('pages', {}).iteritems():
      response = util.github_get('repos', self.owner, self.repo, 'contents/' + path, params={'ref': self.sha})

      if response.status_code == 200:
        response_json = json.loads(response.content)
        markdown = None
        # Ensure a file was returned
        if isinstance(response_json, dict) and response_json.get('type') == 'file':
          markdown = base64.b64decode(response_json.get('content'))
      elif response.status_code == 404:
        markdown = None
      else:
        return self.retry('error fetching page %s (%d)' % (path, response.status_code))

      if markdown is not None:
        response = util.github_markdown(markdown)
        if response.status_code == 200:
          Content(parent=self.version_key, id='page-' + path, content=response.content,
                  status=Status.ready, etag=response.headers.get('ETag', None)).put()
        else:
          return self.retry('error converting page to markdown %s (%d)' % (path, response.status_code))
Esempio n. 2
0
  def update_pages(self):
    bower = Content.get_by_id('bower', parent=self.version_key)
    if bower is None:
      return

    bower_json = bower.get_json()

    for _, path in bower_json.get('pages', {}).iteritems():
      response = util.github_get('repos', self.owner, self.repo, 'contents/' + path, params={'ref': self.sha})

      if response.status_code == 200:
        response_json = json.loads(response.content)
        markdown = None
        # Ensure a file was returned
        if isinstance(response_json, dict) and response_json.get('type') == 'file':
          markdown = base64.b64decode(response_json.get('content'))
      elif response.status_code == 404:
        markdown = None
      else:
        return self.retry('error fetching page %s (%d)' % (path, response.status_code))

      if markdown is not None:
        response = util.github_markdown(markdown)
        if response.status_code == 200:
          Content(parent=self.version_key, id='page-' + path, content=response.content,
                  status=Status.ready, etag=response.headers.get('ETag', None)).put()
        else:
          return self.retry('error converting page to markdown %s (%d)' % (path, response.status_code))
Esempio n. 3
0
    def test_analyze_leaves_existing_content_when_reanalyzing(self):
        library_key = Library(id='owner/repo').put()
        version_key = Version(id='v1.1.1',
                              parent=library_key,
                              sha='sha',
                              status='ready').put()

        content = Content(id='analysis',
                          parent=version_key,
                          status=Status.pending)
        content.content = 'existing data'
        content.status = Status.ready
        content.put()

        response = self.app.get('/task/analyze/owner/repo',
                                headers={'X-AppEngine-QueueName': 'default'})
        self.assertEqual(response.status_int, 200)

        content = Content.get_by_id('analysis', parent=version_key)
        self.assertEqual(content.content, 'existing data')
        self.assertEqual(content.status, Status.ready)

        tasks = self.tasks.get_filtered_tasks()
        self.assertEqual([
            util.ingest_analysis_task('owner', 'repo', 'v1.1.1'),
        ], [task.url for task in tasks])
Esempio n. 4
0
    def handle_post(self):
        message_json = json.loads(
            urllib.unquote(self.request.body).rstrip('='))
        message = message_json['message']
        data = base64.b64decode(str(message['data']))
        attributes = message['attributes']
        owner = attributes['owner']
        repo = attributes['repo']
        version = attributes['version']
        error = attributes.get('error', None)

        version_key = ndb.Key(Library, Library.id(owner, repo), Version,
                              version)

        content = Content.get_by_id('analysis', parent=version_key)
        if content is None:
            return
        if data == '':
            content.set_json(None)
        else:
            content.set_json(json.loads(data))

        if error is None:
            content.status = Status.ready
            content.error = None
        else:
            content.status = Status.error
            content.error = error

        content.put()

        if version_key.id() == Library.default_version_for_key_async(
                version_key.parent()).get_result():
            task_url = util.update_indexes_task(owner, repo)
            util.new_task(task_url, target='manage')
Esempio n. 5
0
  def update_search_index(self, owner, repo, version_key, library, bower):
    metadata = json.loads(library.metadata)
    registry_metadata = json.loads(library.registry_metadata) if library.registry_metadata else None
    npm_description = registry_metadata.get('description', '') if registry_metadata else ''
    npm_keywords = registry_metadata.get('keywords', []) if registry_metadata else []
    fields = [
        search.AtomField(name='owner', value=owner),
        search.TextField(name='repo', value=repo),
        search.AtomField(name='kind', value=library.kind),
        search.AtomField(name='version', value=version_key.id()),
        search.TextField(name='github_description', value=metadata.get('description', '')),
        search.TextField(name='bower_description', value=bower.get('description', '')),
        search.TextField(name='npm_description', value=npm_description),
        search.TextField(name='bower_keywords', value=' '.join(bower.get('keywords', []))),
        search.TextField(name='npm_keywords', value=' '.join(npm_keywords)),
        search.TextField(name='prefix_matches', value=' '.join(util.generate_prefixes_from_list(
            util.safe_split_strip(metadata.get('description')) + util.safe_split_strip(bower.get('description')) +
            util.safe_split_strip(repo)))),
    ]

    # Generate weighting field
    weights = [(repo, 10)]

    analysis = Content.get_by_id('analysis', parent=version_key)
    if analysis is not None and analysis.status == Status.ready:
      data = analysis.get_json()
      if data.get('analyzerData', None) is not None:
        # Use analyzer data for search index
        element_objects = data.get('analyzerData', {}).get('elements', [])
        elements = [element.get('tagname', '') or element.get('classname', '') for element in element_objects]
        if elements != []:
          fields.append(search.TextField(name='element', value=' '.join(elements)))
          weights.append((' '.join(elements), 5))

        behavior_objects = data.get('analyzerData', {}).get('metadata', {}).get('polymer', {}).get('behaviors', [])
        behaviors = [behavior.get('name', '') for behavior in behavior_objects]
        if behaviors != []:
          fields.append(search.TextField(name='behavior', value=' '.join(behaviors)))
          weights.append((' '.join(behaviors), 5))
      else:
        # Use hydrolysis data for search index
        elements = data.get('elementsByTagName', {}).keys()
        if elements != []:
          fields.append(search.TextField(name='element', value=' '.join(elements)))
          weights.append((' '.join(elements), 5))
        behaviors = data.get('behaviorsByName', {}).keys()
        if behaviors != []:
          fields.append(search.TextField(name='behavior', value=' '.join(behaviors)))
          weights.append((' '.join(behaviors), 5))

    weighted = []
    for value, weight in weights:
      for _ in range(0, weight):
        weighted.append(value)
    fields.append(search.TextField(name='weighted_fields', value=' '.join(weighted)))

    rank = int((library.updated - datetime.datetime(2016, 1, 1)).total_seconds())
    document = search.Document(doc_id=Library.id(owner, repo), fields=fields, rank=rank)
    index = search.Index('repo')
    index.put(document)
Esempio n. 6
0
    def get(self, owner, repo, ver=None):
        self.response.headers['Access-Control-Allow-Origin'] = '*'
        owner = owner.lower()
        repo = repo.lower()
        library_key = ndb.Key(Library, Library.id(owner, repo))
        if ver is None:
            ver = yield Library.latest_version_for_key_async(library_key)
        if ver is None:
            self.response.set_status(404)
            return
        version_key = ndb.Key(Library, Library.id(owner, repo), Version, ver)
        analysis = Content.get_by_id('analysis',
                                     parent=version_key,
                                     read_policy=ndb.EVENTUAL_CONSISTENCY)

        if analysis is None:
            self.response.set_status(404)
            return

        self.response.headers['Content-Type'] = 'application/json'
        result = {}
        result['status'] = analysis.status
        if analysis.status == Status.ready:
            result['content'] = json.loads(analysis.content)
        if analysis.status == Status.error:
            result['error'] = analysis.error

        if result['status'] != Status.ready:
            self.response.set_status(400)

        self.response.headers['Content-Type'] = 'application/json'
        self.response.write(json.dumps(result))
Esempio n. 7
0
  def get(self, owner, repo, tag, name=None, path=None):
    self.response.headers['Access-Control-Allow-Origin'] = '*'
    self.response.headers['Content-Type'] = 'application/json'

    owner = owner.lower()
    repo = repo.lower()
    version_key = ndb.Key(Library, '%s/%s' % (owner, repo), Version, tag)

    analysis = Content.get_by_id('analysis', parent=version_key, read_policy=ndb.EVENTUAL_CONSISTENCY)
    if analysis is None:
      self.response.set_status(404)
      return

    dependencies = json.loads(analysis.content).get('bowerDependencies', None)
    if dependencies is None:
      self.response.set_status(404)
      return

    config_map = {}
    for dependency in dependencies:
      config_map[dependency['name']] = '%s/%s/%s' % (dependency['owner'], dependency['repo'], dependency['version'])

    # Ensure the repo serves its own version.
    config_map[repo] = '%s/%s/%s' % (owner, repo, tag)

    def resolve(name, path):
      return 'https://cdn.rawgit.com/%s%s' % (config_map[name], path) if name in config_map else None

    # debug mode
    if name is None or path is None:
      for k in config_map:
        self.response.write('/%s/%s/%s/components/%s/... -> %s\n' % (owner, repo, tag, k, resolve(k, '/...')))
      self.response.write('\n')
      return

    resolved = resolve(name, path)
    if resolved is None:
      self.response.write('%s is not a valid dependency for %s/%s#%s' % (name, owner, repo, tag))
      self.response.set_status(400)
      return

    # TODO: Figure out what other types this is necessary for. eg. do we need it for CSS @import?
    # We need to serve html files from the same origin, so that relative urls load correctly.
    if path.endswith('.html'):
      # TODO: Decide whether this should be memcached. Appengine's urlfetch already does caching.
      response = urlfetch.fetch(resolved)
      if response.status_code == 200:
        self.response.write(response.content)
        self.response.headers['cache-control'] = response.headers.get('cache-control', 'max-age=315569000')
        self.response.headers['content-type'] = response.headers.get('content-type', 'text/html')
      else:
        self.response.write('could not fetch: %s' % resolved)
        self.response.set_status(400)
    else:
      self.response.set_status(301)
      self.response.headers['Location'] = str(resolved)
      self.response.headers['cache-control'] = 'max-age=315569000'
Esempio n. 8
0
    def update_search_index(self, owner, repo, version_key, library, bower):
        metadata = json.loads(library.metadata)
        fields = [
            search.AtomField(name='owner', value=owner),
            search.TextField(name='repo', value=repo),
            search.AtomField(name='kind', value=library.kind),
            search.AtomField(name='version', value=version_key.id()),
            search.TextField(name='github_description',
                             value=metadata.get('description', '')),
            search.TextField(name='bower_description',
                             value=bower.get('description', '')),
            search.TextField(name='bower_keywords',
                             value=' '.join(bower.get('keywords', []))),
            search.TextField(
                name='prefix_matches',
                value=' '.join(
                    util.generate_prefixes_from_list(
                        util.safe_split_strip(metadata.get('description')) +
                        util.safe_split_strip(bower.get('description')) +
                        util.safe_split_strip(repo)))),
        ]

        # Generate weighting field
        weights = [(repo, 10)]

        analysis = Content.get_by_id('analysis', parent=version_key)
        if analysis is not None and analysis.status == Status.ready:
            analysis = json.loads(analysis.content)
            elements = analysis.get('elementsByTagName', {}).keys()
            if elements != []:
                fields.append(
                    search.TextField(name='element', value=' '.join(elements)))
                weights.append((' '.join(elements), 5))
            behaviors = analysis.get('behaviorsByName', {}).keys()
            if behaviors != []:
                fields.append(
                    search.TextField(name='behavior',
                                     value=' '.join(behaviors)))
                weights.append((' '.join(behaviors), 5))

        weighted = []
        for value, weight in weights:
            for _ in range(0, weight):
                weighted.append(value)
        fields.append(
            search.TextField(name='weighted_fields', value=' '.join(weighted)))

        rank = int(
            (library.updated - datetime.datetime(2016, 1, 1)).total_seconds())
        document = search.Document(doc_id=Library.id(owner, repo),
                                   fields=fields,
                                   rank=rank)
        index = search.Index('repo')
        index.put(document)
Esempio n. 9
0
  def trigger_analysis(self, tag, sha, transactional=False):
    analysis_sha = None
    if self.library.kind == 'collection':
      analysis_sha = sha
    version_key = ndb.Key(Library, self.library.key.id(), Version, tag)

    content = Content.get_by_id('analysis', parent=version_key)
    if content is None or content.status == Status.error:
      Content(id='analysis', parent=version_key, status=Status.pending).put()

    task_url = util.ingest_analysis_task(self.scope, self.package, tag, analysis_sha)
    util.new_task(task_url, target='analysis', transactional=transactional, queue_name='analysis')
Esempio n. 10
0
  def trigger_analysis(self, tag, sha, transactional=False):
    analysis_sha = None
    if self.library.kind == 'collection':
      analysis_sha = sha
    version_key = ndb.Key(Library, self.library.key.id(), Version, tag)

    content = Content.get_by_id('analysis', parent=version_key)
    if content is None or content.status == Status.error:
      Content(id='analysis', parent=version_key, status=Status.pending).put()

    task_url = util.ingest_analysis_task(self.owner, self.repo, tag, analysis_sha)
    util.new_task(task_url, target='analysis', transactional=transactional, queue_name='analysis')
Esempio n. 11
0
  def test_analyze(self):
    library_key = Library(id='owner/repo').put()
    version_key = Version(id='v1.1.1', parent=library_key, sha='sha', status='ready').put()

    response = self.app.get('/task/analyze/owner/repo', headers={'X-AppEngine-QueueName': 'default'})
    self.assertEqual(response.status_int, 200)

    content = Content.get_by_id('analysis', parent=version_key)
    self.assertEqual(content.content, None)
    self.assertEqual(content.status, Status.pending)

    tasks = self.tasks.get_filtered_tasks()
    self.assertEqual([
        util.ingest_analysis_task('owner', 'repo', 'v1.1.1'),
    ], [task.url for task in tasks])
Esempio n. 12
0
    def get(self, owner, repo, ver=None):
        use_analyzer_data = self.request.get('use_analyzer_data',
                                             None) is not None
        self.response.headers['Access-Control-Allow-Origin'] = '*'
        owner = owner.lower()
        repo = repo.lower()
        library_key = ndb.Key(Library, Library.id(owner, repo))
        if ver is None:
            ver = yield Library.default_version_for_key_async(library_key)
        if ver is None:
            self.response.set_status(404)
            return
        version_key = ndb.Key(Library, Library.id(owner, repo), Version, ver)
        analysis = Content.get_by_id('analysis',
                                     parent=version_key,
                                     read_policy=ndb.EVENTUAL_CONSISTENCY)

        if analysis is None:
            self.response.set_status(404)
            return

        self.response.headers['Content-Type'] = 'application/json'
        result = {}
        result['status'] = analysis.status
        if analysis.status == Status.ready:
            content = analysis.get_json()

            has_analyzer_data = content.get('analyzerData', None) is not None

            if use_analyzer_data and has_analyzer_data:
                # Use the analyzer data fields
                result['analysis'] = content['analyzerData']
            else:
                # Use the hydrolysis fields and delete the analyzer ones
                if has_analyzer_data:
                    del content['analyzerData']
                result['content'] = content

        if analysis.status == Status.error:
            result['error'] = analysis.error

        if result['status'] != Status.ready:
            self.response.set_status(400)

        self.response.headers['Content-Type'] = 'application/json'
        self.response.write(json.dumps(result))
Esempio n. 13
0
  def test_analyze_latest(self):
    library_key = Library(id='owner/repo').put()
    Version(id='v1.1.1', parent=library_key, sha='sha', status='ready').put()
    version_key = Version(id='v1.1.2', parent=library_key, sha='sha', status='ready').put()
    VersionCache.update(library_key)

    response = self.app.get('/task/analyze/owner/repo/True', headers={'X-AppEngine-QueueName': 'default'})
    self.assertEqual(response.status_int, 200)

    content = Content.get_by_id('analysis', parent=version_key)
    self.assertEqual(content.get_json(), None)
    self.assertEqual(content.status, Status.pending)

    tasks = self.tasks.get_filtered_tasks()
    self.assertEqual([
        util.ingest_analysis_task('owner', 'repo', 'v1.1.2'),
    ], [task.url for task in tasks])
Esempio n. 14
0
  def get(self, owner, repo, ver, path):
    self.response.headers['Access-Control-Allow-Origin'] = '*'

    version_key = ndb.Key(Library, Library.id(owner, repo), Version, ver)

    if version_key is None:
      self.response.set_status(404)
      self.response.write('Invalid repo/version')
      return

    page = Content.get_by_id('page-' + path, parent=version_key, read_policy=ndb.EVENTUAL_CONSISTENCY)

    if page is None:
      self.response.set_status(404)
      self.response.write('Cannot find page %s' % path)
      return

    self.response.write(page.content)
Esempio n. 15
0
File: api.py Progetto: keanulee/v2
  def get(self, owner, repo, ver=None):
    self.response.headers['Access-Control-Allow-Origin'] = '*'

    owner = owner.lower()
    repo = repo.lower()
    version_key = ndb.Key(Library, '%s/%s' % (owner, repo), Version, ver)

    hydrolyzer = Content.get_by_id('hydrolyzer', parent=version_key, read_policy=ndb.EVENTUAL_CONSISTENCY)
    if hydrolyzer is None:
      self.response.set_status(404)
      return

    dependencies = json.loads(hydrolyzer.content).get('bowerDependencies', None)
    if dependencies is None:
      self.response.set_status(404)
      return

    self.response.headers['Content-Type'] = 'application/json'
    self.response.write(json.dumps(dependencies))
Esempio n. 16
0
  def update_search_index(self, owner, repo, version_key, library, bower):
    metadata = json.loads(library.metadata)
    fields = [
        search.AtomField(name='owner', value=owner),
        search.TextField(name='repo', value=repo),
        search.AtomField(name='kind', value=library.kind),
        search.AtomField(name='version', value=version_key.id()),
        search.TextField(name='github_description', value=metadata.get('description', '')),
        search.TextField(name='bower_description', value=bower.get('description', '')),
        search.TextField(name='bower_keywords', value=' '.join(bower.get('keywords', []))),
        search.TextField(name='prefix_matches', value=' '.join(util.generate_prefixes_from_list(
            util.safe_split_strip(metadata.get('description')) + util.safe_split_strip(bower.get('description')) +
            util.safe_split_strip(repo)))),
    ]

    # Generate weighting field
    weights = [(repo, 10)]

    analysis = Content.get_by_id('analysis', parent=version_key)
    if analysis is not None and analysis.status == Status.ready:
      analysis = json.loads(analysis.content)
      elements = analysis.get('elementsByTagName', {}).keys()
      if elements != []:
        fields.append(search.TextField(name='element', value=' '.join(elements)))
        weights.append((' '.join(elements), 5))
      behaviors = analysis.get('behaviorsByName', {}).keys()
      if behaviors != []:
        fields.append(search.TextField(name='behavior', value=' '.join(behaviors)))
        weights.append((' '.join(behaviors), 5))

    weighted = []
    for value, weight in weights:
      for _ in range(0, weight):
        weighted.append(value)
    fields.append(search.TextField(name='weighted_fields', value=' '.join(weighted)))

    rank = int((library.updated - datetime.datetime(2016, 1, 1)).total_seconds())
    document = search.Document(doc_id=Library.id(owner, repo), fields=fields, rank=rank)
    index = search.Index('repo')
    index.put(document)
Esempio n. 17
0
    def update_search_index(self, owner, repo, version_key, library, bower):
        metadata = json.loads(library.metadata)
        fields = [
            search.AtomField(name='owner', value=owner),
            search.TextField(name='repo', value=repo),
            search.AtomField(name='kind', value=library.kind),
            search.AtomField(name='version', value=version_key.id()),
            search.TextField(name='github_description',
                             value=metadata.get('description', '')),
            search.TextField(name='bower_description',
                             value=bower.get('description', '')),
            search.TextField(name='bower_keywords',
                             value=' '.join(bower.get('keywords', []))),
            search.TextField(
                name='prefix_matches',
                value=' '.join(
                    util.generate_prefixes_from_list(
                        [repo] + util.safesplit(metadata.get('description')) +
                        util.safesplit(bower.get('description')) +
                        repo.replace("_", " ").replace("-", " ").split()))),
        ]

        analysis = Content.get_by_id('analysis', parent=version_key)
        if analysis is not None and analysis.status == Status.ready:
            analysis = json.loads(analysis.content)
            elements = analysis.get('elementsByTagName', {}).keys()
            if elements != []:
                fields.append(
                    search.TextField(name='element', value=' '.join(elements)))
            behaviors = analysis.get('behaviorsByName', {}).keys()
            if behaviors != []:
                fields.append(
                    search.TextField(name='behavior',
                                     value=' '.join(behaviors)))

        document = search.Document(doc_id=Library.id(owner, repo),
                                   fields=fields)
        index = search.Index('repo')
        index.put(document)
Esempio n. 18
0
File: api.py Progetto: keanulee/v2
  def get(self, owner, repo, ver=None):
    # TODO: Share all of this boilerplate between GetDataMeta and GetHydroData
    self.response.headers['Access-Control-Allow-Origin'] = '*'
    owner = owner.lower()
    repo = repo.lower()
    library_key = ndb.Key(Library, '%s/%s' % (owner, repo))
    # TODO: version shouldn't be optional here
    if ver is None:
      versions = Version.query(ancestor=library_key).map(lambda v: v.key.id())
      versions.sort(versiontag.compare)
      if versions == []:
        self.response.set_status(404)
        return
      ver = versions[-1]
    version_key = ndb.Key(Library, '%s/%s' % (owner, repo), Version, ver)
    hydro = Content.get_by_id('hydrolyzer', parent=version_key, read_policy=ndb.EVENTUAL_CONSISTENCY)
    if hydro is None:
      self.response.set_status(404)
      return

    self.response.headers['Content-Type'] = 'application/json'
    self.response.write(hydro.content)
Esempio n. 19
0
  def handle_post(self):
    # Ignore payloads larger than 5 MB.
    if len(self.request.body) > 1048487 * 5:
      return
    message_json = json.loads(urllib.unquote(self.request.body).rstrip('='))
    message = message_json['message']
    data = base64.b64decode(str(message['data']))
    attributes = message['attributes']
    if len(attributes) == 0:
      logging.error(message)
      return
    owner = attributes['owner']
    repo = attributes['repo']
    version = attributes['version']
    error = attributes.get('error', None)

    version_key = ndb.Key(Library, Library.id(owner, repo), Version, version)

    content = Content.get_by_id('analysis', parent=version_key)
    if content is None:
      return
    if data == '':
      content.set_json(None)
    else:
      content.set_json(json.loads(data))

    if error is None:
      content.status = Status.ready
      content.error = None
    else:
      content.status = Status.error
      content.error = error

    content.put()

    if version_key.id() == Library.default_version_for_key_async(version_key.parent()).get_result():
      task_url = util.update_indexes_task(owner, repo)
      util.new_task(task_url, target='manage')
Esempio n. 20
0
  def handle_post(self):
    message_json = json.loads(urllib.unquote(self.request.body).rstrip('='))
    message = message_json['message']
    data = base64.b64decode(str(message['data']))
    attributes = message['attributes']
    owner = attributes['owner']
    repo = attributes['repo']
    version = attributes['version']
    error = attributes.get('error', None)

    version_key = ndb.Key(Library, Library.id(owner, repo), Version, version)

    content = Content.get_by_id('analysis', parent=version_key)
    if content is None:
      return
    if data == '':
      content.content = None
    elif len(data) > 500000:
      # Max entity size is only 1MB.
      logging.error('content was too large: %d %s %s', len(data), Library.id(owner, repo), version)
      error = 'content was too large: %d' % len(data)
    else:
      content.content = data

    if error is None:
      content.status = Status.ready
      content.error = None
    else:
      content.status = Status.error
      content.error = error

    content.put()

    if version_key.id() == Library.default_version_for_key_async(version_key.parent()).get_result():
      task_url = util.update_indexes_task(owner, repo)
      util.new_task(task_url, target='manage')
Esempio n. 21
0
  def update_search_index(self, owner, repo, version_key, library, bower):
    metadata = json.loads(library.metadata)
    registry_metadata = json.loads(library.registry_metadata) if library.registry_metadata else None
    npm_description = registry_metadata.get('description', '') if registry_metadata else ''
    npm_keywords = registry_metadata.get('keywords', []) if registry_metadata else []
    fields = [
        search.AtomField(name='owner', value=owner),
        search.AtomField(name='github_owner', value=library.github_owner),
        search.TextField(name='repo', value=repo),
        search.AtomField(name='kind', value=library.kind),
        search.AtomField(name='version', value=version_key.id()),
        search.TextField(name='github_description', value=metadata.get('description', '')),
        search.TextField(name='bower_description', value=bower.get('description', '')),
        search.TextField(name='npm_description', value=npm_description),
        search.TextField(name='bower_keywords', value=' '.join(bower.get('keywords', []))),
        search.TextField(name='npm_keywords', value=' '.join(npm_keywords)),
        search.TextField(name='prefix_matches', value=' '.join(util.generate_prefixes_from_list(
            util.safe_split_strip(metadata.get('description')) + util.safe_split_strip(bower.get('description')) +
            util.safe_split_strip(repo)))),
    ]

    # Generate weighting field
    weights = [(repo, 10)]

    analysis = Content.get_by_id('analysis', parent=version_key)
    if analysis is not None and analysis.status == Status.ready:
      data = analysis.get_json()
      if data.get('analyzerData', None) is not None:
        # Use analyzer data for search index
        element_objects = data.get('analyzerData', {}).get('elements', [])
        elements = [element.get('tagname', '') or element.get('classname', '') for element in element_objects]
        if elements != []:
          fields.append(search.TextField(name='element', value=' '.join(elements)))
          weights.append((' '.join(elements), 5))

        behavior_objects = data.get('analyzerData', {}).get('metadata', {}).get('polymer', {}).get('behaviors', [])
        behaviors = [behavior.get('name', '') for behavior in behavior_objects]
        if behaviors != []:
          fields.append(search.TextField(name='behavior', value=' '.join(behaviors)))
          weights.append((' '.join(behaviors), 5))
      else:
        # Use hydrolysis data for search index
        elements = data.get('elementsByTagName', {}).keys()
        if elements != []:
          fields.append(search.TextField(name='element', value=' '.join(elements)))
          weights.append((' '.join(elements), 5))
        behaviors = data.get('behaviorsByName', {}).keys()
        if behaviors != []:
          fields.append(search.TextField(name='behavior', value=' '.join(behaviors)))
          weights.append((' '.join(behaviors), 5))

    weighted = []
    for value, weight in weights:
      for _ in range(0, weight):
        weighted.append(value)
    fields.append(search.TextField(name='weighted_fields', value=' '.join(weighted)))

    rank = int((library.updated - datetime.datetime(2016, 1, 1)).total_seconds())
    document = search.Document(doc_id=Library.id(owner, repo), fields=fields, rank=rank)
    index = search.Index('repo')
    index.put(document)
Esempio n. 22
0
File: api.py Progetto: keanulee/v2
 def get(self, owner, repo, ver=None):
   owner = owner.lower()
   repo = repo.lower()
   library = Library.get_by_id('%s/%s' % (owner, repo), read_policy=ndb.EVENTUAL_CONSISTENCY)
   if library is None or library.error is not None:
     self.response.write(str(library))
     self.response.set_status(404)
     return
   versions = library.versions()
   if ver is None:
     ver = versions[-1]
   version = Version.get_by_id(ver, parent=library.key, read_policy=ndb.EVENTUAL_CONSISTENCY)
   if version is None or version.error is not None:
     self.response.write(str(version))
     self.response.set_status(404)
     return
   metadata = json.loads(library.metadata)
   dependencies = []
   bower = Content.get_by_id('bower', parent=version.key, read_policy=ndb.EVENTUAL_CONSISTENCY)
   if bower is not None:
     try:
       bower_json = json.loads(bower.content)
     # TODO: Which exception is this for?
     # pylint: disable=bare-except
     except:
       bower_json = {}
   readme = Content.get_by_id('readme.html', parent=version.key, read_policy=ndb.EVENTUAL_CONSISTENCY)
   full_name_match = re.match(r'(.*)/(.*)', metadata['full_name'])
   result = {
       'version': ver,
       'versions': versions,
       'readme': None if readme is None else readme.content,
       'subscribers': metadata['subscribers_count'],
       'stars': metadata['stargazers_count'],
       'forks': metadata['forks'],
       'contributors': library.contributor_count,
       'open_issues': metadata['open_issues'],
       'updated_at': metadata['updated_at'],
       'owner': full_name_match.groups()[0],
       'repo': full_name_match.groups()[1],
       'bower': None if bower is None else {
           'description': bower_json.get('description', ''),
           'license': bower_json.get('license', ''),
           'dependencies': bower_json.get('dependencies', []),
           'keywords': bower_json.get('keywords', []),
       },
       'collections': []
   }
   for collection in library.collections:
     if not versiontag.match(ver, collection.semver):
       continue
     collection_version = collection.version.id()
     collection_library = collection.version.parent().get()
     collection_metadata = json.loads(collection_library.metadata)
     collection_name_match = re.match(r'(.*)/(.*)', collection_metadata['full_name'])
     result['collections'].append({
         'owner': collection_name_match.groups()[0],
         'repo': collection_name_match.groups()[1],
         'version': collection_version
     })
   if library.kind == 'collection':
     dependencies = []
     version_futures = []
     for dep in version.dependencies:
       parsed_dep = Dependency.fromString(dep)
       dep_key = ndb.Key(Library, "%s/%s" % (parsed_dep.owner.lower(), parsed_dep.repo.lower()))
       version_futures.append(Library.versions_for_key_async(dep_key))
     for i, dep in enumerate(version.dependencies):
       parsed_dep = Dependency.fromString(dep)
       versions = version_futures[i].get_result()
       versions.reverse()
       while len(versions) > 0 and not versiontag.match(versions[0], parsed_dep.version):
         versions.pop()
       if len(versions) == 0:
         dependencies.append({
             'error': 'unsatisfyable dependency',
             'owner': parsed_dep.owner,
             'repo': parsed_dep.repo,
             'versionSpec': parsed_dep.version
         })
       else:
         dependencies.append(brief_metadata_from_datastore(parsed_dep.owner, parsed_dep.repo, versions[0]))
     result['dependencies'] = dependencies
   self.response.headers['Access-Control-Allow-Origin'] = '*'
   self.response.headers['Content-Type'] = 'application/json'
   self.response.write(json.dumps(result))
Esempio n. 23
0
    def get(self, owner, repo, tag, name=None, path=None):
        self.response.headers['Access-Control-Allow-Origin'] = '*'
        self.response.headers['Content-Type'] = 'application/json'

        if name is None and path is None:
            self.response.write(
                'Invalid request. Try using a relative path if you are using an absolute path.'
            )
            self.response.set_status(400)
            return

        owner = owner.lower()
        repo = repo.lower()
        version_key = ndb.Key(Library, '%s/%s' % (owner, repo), Version, tag)

        if path.endswith('/'):
            path = path + 'index.html'

        analysis = Content.get_by_id('analysis',
                                     parent=version_key,
                                     read_policy=ndb.EVENTUAL_CONSISTENCY)
        if analysis is None or analysis.status != Status.ready:
            self.response.write('could not find analysis for %s in %s/%s' %
                                (tag, owner, repo))
            self.response.set_status(404)
            return

        dependencies = analysis.get_json().get('bowerDependencies', None)
        if dependencies is None:
            self.response.write('could not find dependencies for %s in %s/%s' %
                                (tag, owner, repo))
            self.response.set_status(404)
            return

        config_map = {}
        for dependency in dependencies:
            if dependency['owner'] == owner and dependency['repo'] == repo:
                continue
            config_map[dependency['name']] = '%s/%s/%s' % (
                dependency['owner'], dependency['repo'], dependency['version'])

        # Ensure the repo serves its own version.
        config_map[repo] = '%s/%s/%s' % (owner, repo, tag)

        def resolve(name, path):
            return 'https://cdn.rawgit.com/%s%s' % (
                config_map[name], path) if name in config_map else None

        # debug mode
        if name is None or path is None:
            for k in config_map:
                self.response.write('/%s/%s/%s/components/%s/... -> %s\n' %
                                    (owner, repo, tag, k, resolve(k, '/...')))
            self.response.write('\n')
            return

        resolved = resolve(name, path)
        if resolved is None:
            self.response.write('%s is not a valid dependency for %s/%s#%s' %
                                (name, owner, repo, tag))
            self.response.set_status(400)
            return

        # TODO: Figure out what other types this is necessary for. eg. do we need it for CSS @import?
        # We need to serve html files from the same origin, so that relative urls load correctly.
        if path.endswith('.html'):
            # TODO: Decide whether this should be memcached. Appengine's urlfetch already does caching.
            response = urlfetch.fetch(resolved, validate_certificate=True)
            if response.status_code == 200:
                self.response.write(response.content)
                self.response.headers['cache-control'] = response.headers.get(
                    'cache-control', 'max-age=315569000')
                self.response.headers['content-type'] = response.headers.get(
                    'content-type', 'text/html')
            else:
                self.response.write('could not fetch: %s' % resolved)
                self.response.set_status(400)
        else:
            self.response.set_status(301)
            self.response.headers['Location'] = str(resolved)
            self.response.headers['cache-control'] = 'max-age=315569000'