예제 #1
0
def build_vectors(articles, weights):
    """
    Build weighted vector representations for a list of articles.
    """
    pub_vecs, bow_vecs, con_vecs = [], [], []
    for a in articles:
        pub_vecs.append(np.array([a.published]))
        bow_vecs.append(vectorize(a.text))
        con_vecs.append(concept_vectorize([c.slug for c in a.concepts]))

    pub_vecs = normalize(csr_matrix(pub_vecs), copy=False)
    bow_vecs = normalize(csr_matrix(bow_vecs), copy=False)
    con_vecs = normalize(csr_matrix(con_vecs), copy=False)

    # Merge vectors.
    vecs = hstack([pub_vecs, bow_vecs, con_vecs])

    # Convert to a scipy.sparse.lil_matrix because it is subscriptable.
    vecs = vecs.tolil()

    # Apply weights to the proper columns:
    # col 0 = pub, cols 1-101 = bow, 102+ = concepts
    # weights = [pub, bow, concept]
    vecs[:,0]     *= weights[0]
    vecs[:,1:101] *= weights[1]
    vecs[:,101:]  *= weights[2]

    return vecs.toarray()
예제 #2
0
            'image': 'http://www.argos.la/image.jpg',
            'name': name
        }
    return None

def faux_uri_for_name(name):
    return "http://fauxpedia.org/resource/{0}".format(name)

def faux_commonness_for_name(name):
    return 100

def faux_commonness_for_uri(name):
    return 100

def faux_concepts(docs):
    return ['Nautilus', 'Picard']

def faux_summarize(title, text):
    return ['this', 'is', 'a', 'fake', 'summary']

def faux_multisummarize(docs):
    return ['this', 'is', 'a', 'fake', 'summary']

from galaxy import vectorize
cached_vector = vectorize('foo bar')
def faux_vectorize(docs):
    return cached_vector

def faux_save_from_url(url, filename):
    return 'https://s3.amazon.com/fakeimage.jpg'
예제 #3
0
파일: models.py 프로젝트: frnsys/galaxy
 def vectors(self):
     return vectorize(self.text)