Beispiel #1
0
def get_text_content(pkg):
    desc = pkg.get('desc', '')
    keywords = pkg.get('keywords', [])
    readme = pkg.get('readme', '') or ''
    if type(readme) != str and type(readme) != unicode:
        # print 'No Readme Found'
        readme = ''
    readmeText = extractText(readme)
    # print 'readme', readmeText
    parsedKeywords = rk.run(readmeText)
    # print 'rake', parsedKeywords
    results = []
    for kw in keywords:
        if len(kw) > 2:
            results.append((kw, 2.0))
    for (kw, score) in parsedKeywords:
        if score > 3:
            results.append((kw, 1.2 * math.log(score, 4)))
    if 2 < len(desc) < 512:
        results.append((desc, 1.0))
    return results
Beispiel #2
0
def get_text_content(pkg):
    desc = pkg.get('desc', '')
    keywords = pkg.get('keywords', [])
    readme = pkg.get('readme', '') or ''
    if type(readme) != str and type(readme) != unicode:
        # print 'No Readme Found'
        readme = ''
    readmeText = extractText(readme)
    # print 'readme', readmeText
    parsedKeywords = rk.run(readmeText)
    # print 'rake', parsedKeywords
    results = []
    for kw in keywords:
        if len(kw) > 2:
            results.append((kw, 2.0))
    for (kw, score) in parsedKeywords:
        if score > 3:
          results.append((kw, 1.2 * math.log(score, 4)))
    if 2 < len(desc) < 512:
        results.append((desc, 1.0))
    return results
Beispiel #3
0
def get_text_content(pkg):
    desc = pkg.get('desc', '')
    keywords = pkg.get('keywords', [])
    readme = pkg.get('readme', '') or ''
    if type(readme) != str and type(readme) != unicode and not readme.startswith('ERROR'):
        debug('No Readme Found')
        readme = ''
    readmeText = extractText(readme)
    debug('readme', readmeText)
    parsedKeywords = rk.run(readmeText)
    debug('rake', parsedKeywords)
    parsedKeywords = [kw for kw in parsedKeywords if kw[1] > 3]
    results = []
    for kw in keywords:
        if len(kw) > 2:
            results.append((kw, 2.0))
    for (kw, score) in parsedKeywords:
        #results.append((kw, 1.5 * math.log(score, 4) / len(parsedKeywords) ))
        results.append((kw, 1.2 * math.log(score, 4) ))
    if 2 < len(desc) < 512:
        results.append((desc, 1.0))
    return results
Beispiel #4
0
def add_to_db(pkg, es):
    keywords = pkg.get("keywords", [])
    readme = pkg.get("readme", "")
    if type(readme) != str and type(readme) != unicode:
        # print 'No Readme Found'
        readme = ""
    plaintextRM = extractText(readme)

    es.index(
        index="temp",
        doc_type="implementation",
        id=get_es_id(pkg),
        body={
            "language": "JavaScript",
            "algorithm": [],
            "source": "npm",
            "description": pkg.get("desc", ""),
            "plaintext-readme": plaintextRM,
            "instruction": {"package": pkg["name"], "command": "npm install " + pkg["name"], "content": readme},
            "popularity": compute_pkg_weight(pkg),
        },
    )
Beispiel #5
0
def add_to_db(pkg, es):
    keywords = pkg.get('keywords', [])
    readme = pkg.get('readme', '')
    if type(readme) != str and type(readme) != unicode:
        # print 'No Readme Found'
        readme = ''
    plaintextRM = extractText(readme)

    es.index(index='temp',
    doc_type='implementation',
    id=get_es_id(pkg),
    body={
        'language': 'JavaScript',
        'algorithm': [],
        'source': 'npm',
        'description': pkg.get('desc', ''),
        'plaintext-readme': plaintextRM,
        'instruction': {
            'package': pkg['name'],
            'command': 'npm install ' + pkg['name'],
            'content': readme
        },
        'popularity': compute_pkg_weight(pkg)
    })