Beispiel #1
0
 def setUp(self):
     city = CityDict.objects.create(name='上海')
     Org.objects.bulk_create(
         [
             Org(name='zanneti', click_nums=2, fav_nums=10, city=city),
             Org(name='wangjiu', click_nums=10, fav_nums=0, city=city),
         ]
     )
Beispiel #2
0
    def __call__(self, key, value):
        """
        K => line number
        V => '10188\tPIH\t1274838358735
        """
        org_id, social_id, last_status_pull = value.split('\t')
        api = self.get_api()

        org = Org.get(org_id)
        
        since = last_status_pull
        page = 1
        
        params = dict(limit=200)
        
        queue = deque([page])
        
        urls = []

        log.info("Creating pools")
         # URL crawlers, just use 20 greenlets for now
        crawler = SocialMediaCrawler(pool_size=20)
        
        while queue:
            page = queue.popleft()
            
            log.info("Got page %s" % page)
            
            posts, next_page, new_last_fetched = self.get_batch(org, limit=self.batch_size, page=page, since=since)
            
            log.info("Got %s posts, next page %s" % (len(posts), next_page ) )
            
            if new_last_fetched:
                setattr(org, self.last_updated_column, str(new_last_fetched))
                org.save()
                    
            if next_page:
                queue.append(next_page)
            
            for post in posts:
                if self.has_links(post):
                    log.info("Post has links")
                    # These are just URLs to crawl and get back HTML to throw in with the rest of the data
                    urls.extend( self.extract_links(post) )
                
                log.info("Yielding post data")
                
                yield (org_id, self.default_text(post))
                    
        
        log.info("Getting %s urls" % len(urls))
        for doc in crawler.crawl(urls):
            yield (org_id, doc)
    def __call__(self, key, value):
        """
        K => line number
        V => '10188\tPIH\t1274838358735
        """
        org_id, social_id, last_status_pull = value.split('\t')
        api = self.get_api()

        org = Org.get(org_id)

        since = last_status_pull
        page = 1

        params = dict(limit=200)

        queue = deque([page])

        urls = []

        log.info("Creating pools")
        # URL crawlers, just use 20 greenlets for now
        crawler = SocialMediaCrawler(pool_size=20)

        while queue:
            page = queue.popleft()

            log.info("Got page %s" % page)

            posts, next_page, new_last_fetched = self.get_batch(
                org, limit=self.batch_size, page=page, since=since)

            log.info("Got %s posts, next page %s" % (len(posts), next_page))

            if new_last_fetched:
                setattr(org, self.last_updated_column, str(new_last_fetched))
                org.save()

            if next_page:
                queue.append(next_page)

            for post in posts:
                if self.has_links(post):
                    log.info("Post has links")
                    # These are just URLs to crawl and get back HTML to throw in with the rest of the data
                    urls.extend(self.extract_links(post))

                log.info("Yielding post data")

                yield (org_id, self.default_text(post))

        log.info("Getting %s urls" % len(urls))
        for doc in crawler.crawl(urls):
            yield (org_id, doc)
Beispiel #4
0
def org_create(request):
    o = Org()
    o.name = request.POST['name'].encode('utf-8')
    o.handle = create_handle(request.POST['name'])
    o.vision_statement = request.POST['vision_statement'].encode('utf-8')
    if request.POST['social_mission'] == 'yes':
        o.social_mission = True
    else:
        o.social_mission = False
    if request.POST['profit'] == 'yes':
        o.profit_seeking = True
    else:
        o.profit_seeking = False

    o.save()
    if request.POST['admin'] == 'yes':
        o.admins.add(request.user)
        o.save()

    f, created = UserToOrgFollow.objects.get_or_create(user=request.user,
                                                       org=o)
    f.following = True
    f.save()
    request.user.refresh_orgs_following()
    return json_response(json_encode(o))
Beispiel #5
0
    def create_test_users():
        henryOrg = Org(
            name="Habitat for Henry",
            street='3241 S Wabash Ave',
            city='Chicago',
            zipCode='60616',
            state='IL',
            country='USA',
            description="""
# HFH: Donate to me

I am a one-man org. Woohoo!

  - Markdown
  - Is
  - Cool
""",
        )

        henryOrg.save()

        henryUser = User.objects.create_user(
            username="******",
            email="*****@*****.**",
            password="******",
            org=henryOrg,
        )
        henryUser.save()

        henryProfile = Profile(
            user=henryUser,
            bio="I am Henry, the guy who made this cool site.",
            birth_date=datetime.strptime('Aug 1 1997', "%b %d %Y"),
        )
        henryProfile.save()

        henryItems: Items = Items.default_object()

        henryItems.apply_list({  # add some items
            'toilet paper': 3,
            'lettuce': 4,
            'bleach': 10,
        })

        henryItems.save()

        henryHome1 = Home(
            user=henryUser,
            name='Condo',
            street='6060 N Ridge Ave',
            city='Chicago',
            zipCode='60660',
            state='IL',
            country='USA',
            items=henryItems,
        )
        henryHome1.save()

        testuserorg = Org(
            name="Habitat for Test",
            street='1634 W Warren Blvd',
            city='Chicago',
            zipCode='60612',
            state='IL',
            country='USA',
            description="""
# Test User's Org

> I am a one-man org. Woohoo!

""",
        )
        testuserorg.save()

        testUser = User.objects.create_user(
            username="******",
            email="*****@*****.**",
            password="******",
            org=testuserorg,
        )
        testUser.save()

        testProfile = Profile(
            user=testUser,
            bio="I am a test user! Hi!",
            birth_date=datetime.now(),

        )
        testProfile.save()

        testitems: Items = Items.default_object()

        testitems.apply_list({  # add some items
            'lettuce': 4,
            'eggplant': 200,
        })

        testitems.save()


        testUserHome = Home(
            user=testUser,
            name='my test home',
            street='3530 S Wolcott Ave',
            city='Chicago',
            zipCode='60609',
            state='IL',
            country='USA',
            items=testitems,
        )
        testUserHome.save()
Beispiel #6
0
def org_create(request):
    o = Org()
    o.name = request.POST['name'].encode('utf-8')
    o.handle = create_handle(request.POST['name'])
    o.vision_statement = request.POST['vision_statement'].encode('utf-8')
    if request.POST['social_mission'] == 'yes':
        o.social_mission = True
    else:
        o.social_mission = False
    if request.POST['profit'] == 'yes':
        o.profit_seeking = True
    else:
        o.profit_seeking = False

    o.save()
    if request.POST['admin'] == 'yes':
        o.admins.add(request.user)
        o.save()

    f, created = UserToOrgFollow.objects.get_or_create(user = request.user, org = o)
    f.following = True
    f.save()
    request.user.refresh_orgs_following()
    return json_response(json_encode(o))