コード例 #1
0
ファイル: sr_rss.py プロジェクト: constantAmateur/sciteit
def validate_link(url,whitelist=False):
    if url:
        url=sanitize_url(url)
        if url:
	    if whitelist and domain(url) not in DOMAIN_WHITELIST:
	        print "Domain %s not in whitelist." % domain(url)
		return False
            try:
                lbu = LinksByUrl._byID(LinksByUrl._key_from_url(url))
            except tdb_cassandra.NotFound:
                return url
            link_id36s = lbu._values()
	    links = Link._byID36(link_id36s, data=True, return_dict=False)
	    links = [l for l in links if not l._deleted]
	    if len(links)==0:
	        return url
	    print "Link %s exists..." % url
    return False 
コード例 #2
0
ファイル: migrate.py プロジェクト: Shilohtd/reddit
def port_cassaurls(after_id=None, estimate=15231317):
    from r2.models import Link, LinksByUrl
    from r2.lib.db import tdb_cassandra
    from r2.lib.db.operators import desc
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.utils import fetch_things2, in_chunks, progress

    q = Link._query(Link.c._spam == (True, False), sort=desc("_date"), data=True)
    if after_id:
        q._after(Link._byID(after_id, data=True))
    q = fetch_things2(q, chunk_size=500)
    q = progress(q, estimate=estimate)
    q = (l for l in q if getattr(l, "url", "self") != "self" and not getattr(l, "is_self", False))
    chunks = in_chunks(q, 500)

    for chunk in chunks:
        with LinksByUrl._cf.batch(write_consistency_level=CL.ONE) as b:
            for l in chunk:
                k = LinksByUrl._key_from_url(l.url)
                if k:
                    b.insert(k, {l._id36: l._id36})
コード例 #3
0
ファイル: migrate.py プロジェクト: wqx081/reddit
def port_cassaurls(after_id=None, estimate=15231317):
    from r2.models import Link, LinksByUrl
    from r2.lib.db import tdb_cassandra
    from r2.lib.db.operators import desc
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.utils import fetch_things2, in_chunks, progress

    q = Link._query(Link.c._spam == (True, False),
                    sort=desc('_date'),
                    data=True)
    if after_id:
        q._after(Link._byID(after_id, data=True))
    q = fetch_things2(q, chunk_size=500)
    q = progress(q, estimate=estimate)
    q = (l for l in q if getattr(l, 'url', 'self') != 'self'
         and not getattr(l, 'is_self', False))
    chunks = in_chunks(q, 500)

    for chunk in chunks:
        with LinksByUrl._cf.batch(write_consistency_level=CL.ONE) as b:
            for l in chunk:
                k = LinksByUrl._key_from_url(l.url)
                if k:
                    b.insert(k, {l._id36: l._id36})