Python scrape 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: helpers

메소드/함수: scrape

hotexamples.com에서의 예제들: 6

Python scrape - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 helpers.scrape에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def GetScores():
    scores = {}

    # First 12 weeks
    for week in range(12):
        week = str(week + 1)
        url = 'https://fantasy.nfl.com/league/' + LEAGUE_ID + '/team/3/gamecenter?week=' + week
        filename = os.path.dirname(
            os.path.realpath(__file__)) + '/data/scores' + week + '.txt'

        # Scrape fresh data, if desired
        if UPDATE:
            scrape(url, filename)

        f = open(filename, 'r')

        txt = f.read()
        html = BeautifulSoup(txt, 'html.parser')

        totals = html.find_all('span', {'class': 'teamTotal'})

        week_scores = {}
        for t in totals:
            team_id = t['class'][1][-1]
            week_scores[team_id] = float(t.text)

        scores[week] = week_scores

        f.close()

    return scores

예제 #2

파일 보기

def convertUrlsToFeatures(urls):
    features = []
    for url in urls:
        result = helpers.scrape(url)
        print(result[1])
        features.append(helpers.start(result[0], result[1], result[2], cat_dict, stem_dict, counts_dict, loaded_model, count_vect, tfidf_transformer, bias, assertives,
                                      factives, hedges, implicatives, report_verbs, positive_op, negative_op, wneg, wpos, wneu, sneg, spos, sneu))
    features = np.array(features)
    return features

예제 #3

파일 보기

def GetRecords():
    url = 'https://fantasy.nfl.com/league/' + LEAGUE_ID
    filename = 'data/records.txt'

    # Scrape fresh data, if desired
    if UPDATE:
        scrape(url, filename)

    f = open(filename, 'r')

    txt = f.read()
    html = BeautifulSoup(txt, 'html.parser')

    data = html.find_all('td', {'class': 'teamWinPct'})

    records = {}
    for d in data:
        records[d.parent['class'][0][-1]] = d.text

    f.close()

    return records

예제 #4

파일 보기

def GetTeams():
    url = 'https://fantasy.nfl.com/league/' + LEAGUE_ID
    filename = os.path.dirname(os.path.realpath(__file__)) + '/data/teams.txt'

    # Scrape fresh data, if desired
    if UPDATE:
        scrape(url, filename)

    f = open(filename, 'r')

    txt = f.read()
    html = BeautifulSoup(txt, 'html.parser')

    data = html.find_all('a', {'class': 'teamName'})

    teams = {}
    for d in data:
        teams[d['class'][1][-1]] = d.text

    f.close()

    return teams

예제 #5

파일 보기

def GetManagers():
    url = 'https://fantasy.nfl.com/league/' + LEAGUE_ID + '/owners'
    filename = os.path.dirname(
        os.path.realpath(__file__)) + '/data/managers.txt'

    # Scrape fresh data, if desired
    if UPDATE:
        scrape(url, filename)

    f = open(filename, 'r')

    txt = f.read()
    html = BeautifulSoup(txt, 'html.parser')

    data = html.find_all('a', {'class': 'teamName'})

    managers = {}
    for d in data:
        manager = d.parent.parent.nextSibling.text
        managers[d['class'][1][-1]] = manager

    f.close()

    return managers

예제 #6

파일 보기

파일: snippets_keywords_builder.py 프로젝트: arliber/simoti-backend

def setSnippetWeightedKeywords(snippetId):
  ''' Find, weight and save related bigrams and trigrams for selected snipept

  Args:
    snippetId (int): Snippet ID

  Returns:
    String summary of the process
  '''
  snippet = getSnippetById(snippetId)
  print('Found snippet id [', snippetId, ']')

  searchQuery = snippet.get('searchQuery', snippet.get('title', ''))
  urls = getQueryUrls(searchQuery, snippet['language'])
  print('Found {} search results for [{}]'.format(len(urls), searchQuery))
  print(urls)

  articles = [scrape(url, snippet['language'])[0] for url in urls if not re.compile(r'\.pdf$', re.M|re.I).search(url)]
  print('Scraped {} articles'.format(len(articles)))

  if len(articles):
      # Build TF-IDF matrix
      stopWords = getStopWords(snippet['language'])
      vectorizer = TfidfVectorizer(max_df=0.6,
                                   min_df=0.2,
                                   ngram_range=(1, 3),
                                   lowercase=True,
                                   max_features=config.snippetsKeywordsBuilder['maxFeatures'],
                                   stop_words=stopWords)
      TfIdfMatrix = vectorizer.fit_transform(articles)

      inspectMatrix(TfIdfMatrix, vectorizer)

      topFeatures = getTopNFeatures(vectorizer, TfIdfMatrix)
      print('Top features are:', topFeatures)

      topFeaturesNormalized = [(feature[0],feature[1]/topFeatures[0][1]) for feature in topFeatures] #topFeatures[0][1] is the max value (ordered list)
      print('Top features normazlied:', topFeaturesNormalized)

      weightedNGrams = getPhrases(topFeaturesNormalized, articles, snippet['language'])
      # print('N grams:', weightedNGrams)

      saveSnippetKeywords(snippet, weightedNGrams)

      return resultSummary(weightedNGrams, snippetId)
  else:
      return 'no relevant articles found'