Example #1
0
def init_process(row):
    uid = str(uuid.uuid3(uuid.NAMESPACE_OID, "Process/" + row[0]))
    cat_id = get_category_id("PROCESS", row[4], row[3])
    flow_id = str(uuid.uuid3(uuid.NAMESPACE_OID, "Flow/" + row[0]))
    p = {
        "@context": "http://greendelta.github.io/olca-schema/context.jsonld",
        "@type": "Process",
        "@id": uid,
        "name": row[2],
        "processTyp": "UNIT_PROCESS",
        "category": {"@type": "Category", "@id": cat_id},
        "processDocumentation": {"copyright": False},
        "exchanges": [
            {
                "@type": "Exchange",
                "avoidedProduct": False,
                "input": False,
                "amount": 1.0,
                "flow": {"@type": "Flow", "@id": flow_id},
                "unit": {
                    "@type": "Unit",
                    "@id": "3f90ee51-c78b-4b15-a693-e7f320c1e894"
                },
                "flowProperty": {
                    "@type": "FlowProperty",
                    "@id": "b0682037-e878-4be4-a63a-a7a81053a691"
                },
                "quantitativeReference": True
            }
        ]
    }
    return p
Example #2
0
def setup_app(command, conf, vars):
    """Place any commands to setup repository here"""
    # Don't reload the app if it was loaded under the testing environment
    if not pylons.test.pylonsapp:
        load_environment(conf.global_conf, conf.local_conf)

    # Create the tables if they don't already exist
    Base.metadata.create_all(bind=Session.bind)

    namespace = uuid.UUID(conf.global_conf['uuid_namespace'])

    # Default groups
    users = model.Group(name='users')
    users.uuid = uuid.uuid3(namespace, 'GROUP'+'users').hex
    Session.add(users)
    Session.commit()

    # add some users from a file into the db for testing
    # each line of the file should be of the form name,email,dn
    admin_file = conf.global_conf['admin_file']
    f = open(path.expandvars(admin_file), 'r')
    for line in f:
        name, email, dn = line.rstrip('\n').split(',')
        user = model.User(name=name, email=email, client_dn=dn)
        user.uuid = uuid.uuid3(namespace, dn).hex
        user.gobal_admin=True
        user.suspended=False
        user.groups.append(users)
        Session.add(user)
        Session.commit()
    f.close()
def migrate_standardpage_intro_and_body_to_streamfield(apps, schema_editor):
    StandardPage = apps.get_model('torchbox.StandardPage')
    stream_block = StandardPage._meta.get_field('streamfield').stream_block

    # Append body to beginning of streamfield
    for page in StandardPage.objects.exclude(body__in=['', '<p></p>', '<p><br/></p>']):
        # Add body as first block so it appears in the same place on the template
        page.streamfield = StreamValue(
            stream_block,
            [
                ('paragraph', RichText(page.body), str(uuid3(UUID_NAMESPACE, page.body))),
            ] + [
                (child.block_type, child.value, child.id)
                for child in page.streamfield
            ]
        )

        page.save()

    # Append intro to beginning of streamfield
    for page in StandardPage.objects.exclude(intro__in=['', '<p></p>', '<p><br/></p>']):
        # Add intro as first block so it appears in the same place on the template
        page.streamfield = StreamValue(
            stream_block,
            [
                ('paragraph', RichText(page.intro), str(uuid3(UUID_NAMESPACE, page.intro))),
            ] + [
                (child.block_type, child.value, child.id)
                for child in page.streamfield
            ]
        )

        page.save()
Example #4
0
    def initialize(self):
        root  = {"role_name": "root"}
        admin = {"username": "******",
                 "password": "******",
                 "status": 1,
                 "email": "*****@*****.**",
                 "role_code": str(uuid.uuid3(uuid.NAMESPACE_DNS, "root"))}

        roles = AlchemyWrapper("roles")
        users = AlchemyWrapper("users")
        node  = AlchemyWrapper("resource")
        if len(roles.all(**root))==0:
            roles.insert(root)
        if len(users.all(username="******"))==0:
            users.insert(admin)

        attribute = getattr(options, "attribute", "scarecrow")
        for api in self.api_list.get("api"):
            combinat  = api.get("url") + attribute
            node_code = str(uuid.uuid3(uuid.NAMESPACE_DNS, str(combinat)))
            node_info = {"attribute": attribute,
                         "code": node_code,
                         "resource_name": api.get("name"),
                         "resource_URI": api.get("url")}
            if len(node.all(**node_info))==0:
                node.insert(node_info)
Example #5
0
 def parse(self,response):
     global uuid
     #生成uuid的namespace,和当前链接相关
     namespace = uuid.uuid3(uuid.NAMESPACE_URL,response.url)
     
     item = Baike_Qijia_Item()
     split1 = '>'
     split2 = ';'
     item['title_id'] = response.url.split("-")[1][:-1] #取url中的id
     
     item['title_url'] = response.url
     item['title_name'] = Selector(response).xpath("//div[@class='artical-des atical-des1 fl']/h1/text()").extract()
     item['title_introduction'] = Selector(response).xpath("//div[@class='artical-des atical-des1 fl']/div[1]/i/text()").extract()
     #分类用>连接成为一个字符串
     category = Selector(response).xpath("//div[@class='bk-nav clearfix']/a/text()").extract()
     item['title_category'] = split1.join(category)
     item['content_name'] = Selector(response).xpath("//div[@class='atical-floor']/div/h2/a/text()").extract()
     #内容主键生成,标题表uuid_list生成
     content_uuid = []
     index = 1
     while index <= len(item['content_name']):
         con_id = uuid.uuid3(namespace,'%d'%index)
         content_uuid.append(con_id.hex)
         index = index + 1
     item['content_uuid'] = content_uuid
     item['content_uuid_list'] = split2.join(content_uuid)
     #content_text得处理text里面的html标签
     item['content_text'] = Selector(response).xpath("//div[@class='atical-floor']/div/div/div/p").extract()
     
     item['image_urls'] = Selector(response).xpath("//div[@class='floor-content floor-content-ml clearfix']/div/img/@src").extract()
     
     return item
     
def update_revisions(page, content):
    streamfield_json = content.get('streamfield', '')

    if streamfield_json:
        streamfield = json.loads(streamfield_json)
    else:
        streamfield = []

    # Append body to beginning of streamfield
    if content['body'] not in ['', '<p></p>', '<p><br/></p>']:
        content['old_body'] = content['body']

        streamfield.insert(0, {
            "type": "paragraph",
            "value": content['body'],
            "id": str(uuid3(UUID_NAMESPACE, content['body'])),
        })

    # Append intro to beginning of streamfield
    if content['intro'] not in ['', '<p></p>', '<p><br/></p>']:
        streamfield.insert(0, {
            "type": "paragraph",
            "value": content['intro'],
            "id": str(uuid3(UUID_NAMESPACE, content['intro'])),
        })

    # Save streamfield content with "body" key, as it was renamed as well in this migration
    content['body'] = json.dumps(streamfield)

    return content
Example #7
0
def create_mock_resource_temaplate():
    ### Resource to be reuqested for 'mock'
    resource_requests = {'compute': {}, 'network': {}}

    ###### mycompute-0
    msg = rmgryang.VDUEventData_RequestInfo()
    msg.image_id  = str(uuid.uuid3(uuid.NAMESPACE_DNS, 'image-0'))
    msg.vm_flavor.vcpu_count = 4
    msg.vm_flavor.memory_mb = 8192
    msg.vm_flavor.storage_gb = 40
    resource_requests['compute']['mycompute-0'] = msg

    ###### mycompute-1
    msg = rmgryang.VDUEventData_RequestInfo()
    msg.image_id  = str(uuid.uuid3(uuid.NAMESPACE_DNS, 'image-1'))
    msg.vm_flavor.vcpu_count = 2
    msg.vm_flavor.memory_mb = 8192
    msg.vm_flavor.storage_gb = 20
    resource_requests['compute']['mycompute-1'] = msg

    ####### mynet-0
    msg = rmgryang.VirtualLinkEventData_RequestInfo()
    resource_requests['network']['mynet-0'] = msg
    
    ####### mynet-1
    msg = rmgryang.VirtualLinkEventData_RequestInfo()
    resource_requests['network']['mynet-1'] = msg

    return resource_requests
Example #8
0
def mock_data(fields):
    result = {}
    for f in fields:
        fname = f["name"]
        fval = f.get("default", NotImplemented)
        if fval is not NotImplemented:
            result[fname] = fval
            continue

        ftype = f.get("type", "string")
        f_id = abs(id(f))
        if ftype == "string":
            result[fname] = uuid.uuid3(uuid.NAMESPACE_OID, str(f_id)).get_hex()[:8]
        elif ftype == "integer":
            result[fname] = f_id % 100
        elif ftype == "float":
            result[fname] = f_id % 100 / 1.0
        elif ftype == "uuid":
            result[fname] = uuid.uuid3(uuid.NAMESPACE_OID, str(f_id)).get_hex()
        elif ftype == "date":
            result[fname] = datetime.date.today().isoformat()
        elif ftype == "datetime":
            result[fname] = datetime.datetime.today().isoformat()
        elif ftype == "boolean":
            result[fname] = [True, False][f_id % 2]
        elif ftype.endswith("list"):
            result[fname] = []
    return result
Example #9
0
    def _memoize_make_version_hash(self):
        if self.namespace.startswith('http'):
            UUID = uuid.uuid3(uuid.NAMESPACE_URL, self.namespace)
        if self.namespace:
            UUID = uuid.uuid3(uuid.NAMESPACE_DNS, self.namespace)
        else:
            UUID = uuid.uuid4()

        return base64.b64encode(UUID.bytes)[:6].decode(ENCODING)
Example #10
0
 def _set_identifiers(self):
     self.identifiers = []
     entry_uuid = None
     if self.isbn is not None:
         entry_uuid = uuid.uuid3(self.uuid_master, isbn)
         self.identifiers.append('urn:isbn:%s' % isbn)
     else:
         entry_uuid = uuid.uuid3(self.uuid_master, ''.join(self.authors) + self.title)
     self.urn = 'urn:uuid:%s' % entry_uuid
Example #11
0
    def get_data(self, index=None):
        if index == None:
            index = self.mutation_index

        valuesize = self.valuesize_sequence[index % len(self.valuesize_sequence)]
        if self.cache_data:
            if not valuesize in self.data_cache:
                self.data_cache[valuesize] = (str(uuid.uuid3(self.uuid,`index`)) * (1+valuesize/36))[:valuesize]
            return `index` + self.data_cache[valuesize]
        else:
            return (str(uuid.uuid3(self.uuid,`index`)) * (1+valuesize/36))[:valuesize]
Example #12
0
    def save(self, commit=True):
        # Save the provided password in hashed format
        user = super(UserCreationForm, self).save(commit=False)
        user.set_password(self.cleaned_data["password1"])
        user.userid = str(uuid.uuid3(uuid.uuid4(), str(time.time())).hex)
        user.usignature = str(uuid.uuid3(uuid.uuid4(), str(time.time())).hex)
        user.clientid = str(uuid.uuid3(uuid.uuid4(), str(time.time())).hex)

        if commit:
            user.save()
        return user
def get_uuid_code():
    """
    uuid模块生成随机码
    :return: 随机码
    """
    print(uuid.uuid3(uuid.NAMESPACE_DNS, 'practice_0001.py'))  # 基于MD5值
    print(uuid.uuid4())  # 随机uuid
    print(uuid.uuid5(uuid.NAMESPACE_DNS, 'practice_0001.py'))  # 基于SHA-1值
    for ui in range(10):
        print(uuid.uuid3(uuid.NAMESPACE_DNS, '{}'.format(ui)))
    return uuid.uuid1()
Example #14
0
def tc_today(n=1):
    """
        generate n table codes for today
    """
    for i in range(0,n):
        tcode = str(uuid.uuid3(uuid.uuid1(), 'digital menu'))[:4]
        if tcode == 'dba5':
            tcode = str(uuid.uuid3(uuid.uuid1(), 'digital menu'))[:4]
        # insert this table code to use
        tc = TableCode(code=tcode, date=date.today())
        tc.save()
Example #15
0
    def get_json_data(self, index=None):
        if index == None:
            index = self.mutation_index

        valuesize = self.valuesize_sequence[index % len(self.valuesize_sequence)]
        if self.cache_data:
            if not valuesize in self.data_cache:
                self.data_cache[valuesize] = (str(uuid3(self.uuid,`index`)) * (1+valuesize/36))[:valuesize]
            return json.dumps({'index':index,'data':self.data_cache[valuesize],'size':valuesize})
        else:
            return json.dumps({'index':index,'data':(str(uuid3(self.uuid,`index`)) * (1+valuesize/36))[:valuesize],'size':valuesize})
Example #16
0
    def test_uuid3(self):
        equal = self.assertEqual

        # Test some known version-3 UUIDs.
        for u, v in [
            (uuid.uuid3(uuid.NAMESPACE_DNS, "python.org"), "6fa459ea-ee8a-3ca4-894e-db77e160355e"),
            (uuid.uuid3(uuid.NAMESPACE_URL, "http://python.org/"), "9fe8e8c4-aaa8-32a9-a55c-4535a88b748d"),
            (uuid.uuid3(uuid.NAMESPACE_OID, "1.3.6.1"), "dd1a1cef-13d5-368a-ad82-eca71acd4cd1"),
            (uuid.uuid3(uuid.NAMESPACE_X500, "c=ca"), "658d3002-db6b-3040-a1d1-8ddd7d189a4d"),
        ]:
            equal(u.variant, uuid.RFC_4122)
            equal(u.version, 3)
            equal(u, uuid.UUID(v))
            equal(str(u), v)
Example #17
0
    def generateUuid(self, email_id, machine_name):
        """ return a uuid which uniquely identifies machinename and email id """
        uuidstr = None

        if machine_name not in self.d:
            myNamespace = uuid.uuid3(uuid.NAMESPACE_URL, machine_name)
            uuidstr = str(uuid.uuid3(myNamespace, email_id)) 

            self.d[machine_name] = (machine_name, uuidstr, email_id)
            self.d[uuidstr] = (machine_name, uuidstr ,email_id)
        else:
            (machine_name, uuidstr, email_id) = self.d[machine_name]

        return uuidstr
Example #18
0
  def generate_id(self, email, password):
    host_id = None

    if email not in self.db:
      name = uuid.uuid3(uuid.NAMESPACE_URL, email)
      host_id = str(uuid.uuid3(name, email)) 

      self.db[email] = (email, password, host_id)
      self.db[host_id] = (email, password, host_id)

    else:
      (email, password, host_id) = self.db[email]

    return host_id
Example #19
0
 def _key_to_id(self, key):
     """
     Converts Ecospold01 "number" attributes to UUIDs using the internal UUID namespace.
     :param key:
     :return:
     """
     if isinstance(key, int):
         key = str(key)
     u = to_uuid(key)
     if u is not None:
         return u
     if six.PY2:
         return uuid.uuid3(self._ns_uuid, key.encode('utf-8'))
     else:
         return uuid.uuid3(self._ns_uuid, key)
Example #20
0
def tc_days(n=30,per_day=10):
    # generate table codes per day
    for i in range(0,n):
        valid_day = date.today()+timedelta(i)
        tc = TableCode(code='dba5', date=valid_day)
        tc.save()
        for j in range(0,per_day):
            # 10 table codes per day
            tcode = str(uuid.uuid3(uuid.uuid1(), 'digital menu'))[:4]
            if tcode == 'dba5':
                # regenerate since it collides with default
                tcode = str(uuid.uuid3(uuid.uuid1(), 'digital menu'))[:4]
            # insert this table code to use
            tc = TableCode(code=tcode, date=valid_day)
            tc.save()
Example #21
0
def DoubanMoviePic(header, body, SendMsgFunc):
    soup = BeautifulSoup(body)
    nodelist = soup('div', attrs={"class" : "mod"})
    for node in nodelist:
        pictype = None
        if node.h2:
            if node.h2.text.startswith(u'海报'):
                pictype = '1'
                rawpictype = '2'
            elif node.h2.text.startswith(u'剧照'):
                pictype = '3'
                rawpictype = '4'

        if pictype:
            piclist = node('img', attrs={"src" : RE_PIC_SRC})
            for pic in piclist:
                row = {}
                picid = RE_PIC_SRC.search(pic['src']).group('picid')

                row['CONTENT_IMG_ID'] = str(uuid.uuid3(uuid.NAMESPACE_DNS, ('Douban.rawpic.%s' % picid).encode('utf8')))
                row['CONTENT_ID'] = header['ContentId']
                row['SOURCE_URL'] = header['Url']
                row['IMG_URL'] = pic['src'].replace('/albumicon/', '/raw/')
                row['IMG_NAME'] = None
                row['IMG_TYPE'] = rawpictype
                Insert('CONTENT_IMG', row, SendMsgFunc)
Example #22
0
def _remote_fetch(env, url, out_file=None, allow_fail=False):
    """Retrieve url using wget, performing download in a temporary directory.

    Provides a central location to handle retrieval issues and avoid
    using interrupted downloads.
    """
    if out_file is None:
        out_file = os.path.basename(url)
    if not env.safe_exists(out_file):
        orig_dir = env.safe_run_output("pwd").strip()
        temp_ext = "/%s" % uuid.uuid3(uuid.NAMESPACE_URL,
                                      str("file://%s/%s/%s/%s" %
                                          (env.host, socket.gethostname(),
                                           datetime.datetime.now().isoformat(), out_file)))
        with _make_tmp_dir(ext=temp_ext) as tmp_dir:
            with cd(tmp_dir):
                with warn_only():
                    result = env.safe_run("wget --no-check-certificate -O %s '%s'" % (out_file, url))
                if result.succeeded:
                    env.safe_run("mv %s %s" % (out_file, orig_dir))
                elif allow_fail:
                    out_file = None
                else:
                    raise IOError("Failure to retrieve remote file")
    return out_file
Example #23
0
 def test_daily_stats(self):
     """
     checking if there are the statistics of the day
     """
     client = Client()
     client.login(username='******', password='******')
     resources =  resourceInfoType_model.objects.all()
     for resource in resources:
         resource.storage_object.publication_status = INGESTED
         resource.storage_object.save()
         client.post(ADMINROOT, \
             {"action": "publish_action", ACTION_CHECKBOX_NAME: resource.id}, \
             follow=True)
         
     # get stats days date 
     response = client.get('/{0}stats/days'.format(DJANGO_BASE))
     self.assertEquals(200, response.status_code)
     # get stats info of the node 
     response = client.get('/{0}stats/get'.format(DJANGO_BASE))
     self.assertEquals(200, response.status_code)
     self.assertContains(response, "lrcount")
     self.assertNotContains(response, "usagestats")
     # get full stats info of the node 
     response = client.get('/{0}stats/get/?statsid={1}'.format(DJANGO_BASE, str(uuid.uuid3(uuid.NAMESPACE_DNS, STORAGE_PATH))))
     self.assertEquals(200, response.status_code)
     self.assertContains(response, "usagestats")
Example #24
0
    def insert(self, metadata):
        instance = self.model()
        try:
            if metadata.has_key('code') == False and hasattr(instance, 'code'):
                if self.tablename in ("roles", "users"):
                    name = metadata.get("role_name") if metadata.has_key("role_name") else metadata.get("username")
                    setattr(instance, 'code', str(uuid.uuid3(uuid.NAMESPACE_DNS, str(name))))
                else:
                    setattr(instance, 'code', str(uuid.uuid4()))

            for key, value in metadata.items():
                if hasattr(instance, key):
                    setattr(instance, key, value)
            self.session.add(instance)

            # Flush
            self.session.flush()
            # To Dict
            result = self.to_dict(instance.__dict__)
            # Commit
            self.session.commit()
            result["errorcode"] = 1
        except:
            self.logging_error()
            # print(traceback.format_exc())
            self.session.rollback()
            result = {'errorcode':0}
        return result
Example #25
0
def make_ical(data, sources):
    calweek_regex = re.compile(r'^(\d+)\. KW$')
    time_regex = re.compile(r'^(\d+)\.(\d+) - (\d+)\.(\d+)$')
    room_regex = re.compile(r'^(.*) - (.*)$')

    times = {}
    for time in data[0]['order']:
        matches = time_regex.match(time)
        if not matches:
            raise CannotParseTime("String was: %s" % time)
        newtime = {'start': rd.relativedelta(hour=int(matches.group(1)), minute=int(matches.group(2))),
                   'end': rd.relativedelta(hour=int(matches.group(3)), minute=int(matches.group(4)))}
        times[time] = newtime

    calendar = vobject.iCalendar()

    cat_map = {u"V": u"Vorlesung",
               u"Ü": u"Übung",
               u"P": u"Praktikum"}

    begin_date = None
    for week in data:
        if not begin_date:
            calweek = calweek_regex.match(week['week'])
            if not calweek:
                raise CannotParseCalweek("String was: %s" % week['week'])
            calweek = int(calweek.group(1))
            begin_date = datetime.now() + rd.relativedelta(month=1, day=4, weekday=rd.MO(-1), weeks=+(calweek - 1), hour=0, minute=0, second=0, microsecond=0)
        else:
            begin_date = begin_date + rd.relativedelta(weeks=+1)

        for day in range(0,5):
            day_data = week['data'][day]
            day_date = begin_date + rd.relativedelta(days=+day)
            for time in day_data:
                for entry in day_data[time]:
                    event = calendar.add('vevent')
                    event.add('dtstart').value = day_date + times[time]["start"]
                    event.add('dtend').value = day_date + times[time]["end"]
                    cat = ""
                    if entry["typ"][0] in cat_map:
                        event.add('categories').value = ["UNI:" + cat_map[entry["typ"][0]]]
                        cat = " (%s)" % cat_map[entry["typ"][0]]

                    teacher = entry["room"]
                    room_match = room_regex.match(entry["room"])
                    if room_match:
                        event.add('location').value = room_match.group(1).strip()
                        teacher = room_match.group(2)

                    event.add('summary').value = "%s%s" % (entry['name'], cat)
                    event.add('description').value = u"Kürzel: %s\nDozent: %s\nVeranstaltungsdyp: %s\nQuelle:%s" % (entry["short"],
                                                                     teacher,
                                                                     entry["typ"],
                                                                     sources[entry['source']].string)
                    uid = uuid.uuid3(uuid.NAMESPACE_DNS, '%s %s' % (str(event.location.value),
                                                                    str(event.dtstart.value)))
                    event.add("uid").value = str(uid)

    return calendar.serialize()
Example #26
0
 def get_links(self):
     """get all the news links in the page
     """
     soup = BeautifulSoup(self.page)
     vote = 0
     infos = []
     links = []
     for link in soup.find_all('a'):
         l = link['href']
         if l.startswith('vote'):
             vote = 1
         elif vote == 1:
             if l.startswith("item"):
                 l = "%s/%s" % (self.surl, l)
             infos = [Markup.escape(link.string),
                      Markup.escape(l.strip()),
                      date_internet(datetime.now())]
             time.sleep(1)
             vote = 2
         elif l.startswith('item') and vote == 2:
             infos.append("%s/%s" % (self.surl, l))
             infos.append(uuid3(NAMESPACE_DNS, infos[1]))
             links.append(infos)
             vote = 0
     return links
def track_call(api_action, api_label, x_tba_app_id):
    """
    For more information about GAnalytics Protocol Parameters, visit
    https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters
    """
    analytics_id = Sitevar.get_by_id("google_analytics.id")
    if analytics_id is None:
        logging.warning("Missing sitevar: google_analytics.id. Can't track API usage.")
    else:
        GOOGLE_ANALYTICS_ID = analytics_id.contents['GOOGLE_ANALYTICS_ID']
        params = urllib.urlencode({
            'v': 1,
            'tid': GOOGLE_ANALYTICS_ID,
            'cid': uuid.uuid3(uuid.NAMESPACE_X500, str(x_tba_app_id)),
            't': 'event',
            'ec': 'api-v02',
            'ea': api_action,
            'el': api_label,
            'cd1': x_tba_app_id,  # custom dimension 1
            'ni': 1,
            'sc': 'end',  # forces tracking session to end
        })

        analytics_url = 'http://www.google-analytics.com/collect?%s' % params
        urlfetch.fetch(
            url=analytics_url,
            method=urlfetch.GET,
            deadline=10,
        )
def track_call(api_action, api_details, x_tba_app_id):
    analytics_id = Sitevar.get_by_id("google_analytics.id")
    if analytics_id is None:
        logging.warning("Missing sitevar: google_analytics.id. Can't track API usage.")
    else:
        GOOGLE_ANALYTICS_ID = analytics_id.contents['GOOGLE_ANALYTICS_ID']
        params = urllib.urlencode({
            'v': 1,
            'tid': GOOGLE_ANALYTICS_ID,
            'cid': uuid.uuid3(uuid.NAMESPACE_X500, str(x_tba_app_id)),
            't': 'event',
            'ec': 'api',
            'ea': api_action,
            'el': api_details,
            'cd1': x_tba_app_id,  # custom dimension 1
            'ni': 1,
            'sc': 'end',  # forces tracking session to end
        })

        # Sets up the call
        analytics_url = 'http://www.google-analytics.com/collect?%s' % params
        urlfetch.fetch(
            url=analytics_url,
            method=urlfetch.GET,
            deadline=10,
        )
    def track_notification(self, notification_type_enum, num_keys):
        """
        For more information about GAnalytics Protocol Parameters, visit
        https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters
        """
        analytics_id = Sitevar.get_by_id("google_analytics.id")
        if analytics_id is None:
            logging.warning("Missing sitevar: google_analytics.id. Can't track API usage.")
        else:
            GOOGLE_ANALYTICS_ID = analytics_id.contents['GOOGLE_ANALYTICS_ID']
            params = urllib.urlencode({
                'v': 1,
                'tid': GOOGLE_ANALYTICS_ID,
                'cid': uuid.uuid3(uuid.NAMESPACE_X500, str('tba-notification-tracking')),
                't': 'event',
                'ec': 'notification',
                'ea': NotificationType.type_names[notification_type_enum],
                'ev': num_keys,
                'ni': 1,
                'sc': 'end',  # forces tracking session to end
            })

            analytics_url = 'http://www.google-analytics.com/collect?%s' % params
            urlfetch.fetch(
                url=analytics_url,
                method=urlfetch.GET,
                deadline=10,
            )
Example #30
0
 def createUser(self):
     """Create a new user"""
     uid = str(uuid.uuid3(uuid.NAMESPACE_DNS, self.username + str(time.time() )))
     print uid
     self.cur.execute("INSERT INTO users VALUES(?,?,?)", (self.username, self.password, uid))
     self.con.commit()
     return True
Example #31
0
class MySpider(spider.Spider):
    def __init__(self,
                 proxy_enable=False,
                 proxy_max_num=setting.PROXY_MAX_NUM,
                 timeout=setting.HTTP_TIMEOUT,
                 cmd_args=None):
        spider.Spider.__init__(self,
                               proxy_enable,
                               proxy_max_num,
                               timeout=timeout,
                               cmd_args=cmd_args)

        # 网站名称
        self.siteName = "海南省公共资源交易中心"
        # 类别码,01新闻、02论坛、03博客、04微博 05平媒 06微信  07 视频、99搜索引擎
        self.info_flag = "99"

        # 入口地址列表
        # self.start_urls = ["http://www.bidcenter.com.cn/viplist-1.html"]
        self.start_urls = ["http://zw.hainan.gov.cn"]
        self.encoding = 'utf-8'
        self.site_domain = 'hainan.gov.cn'
        self.dedup_uri = None
        self.headers = {
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            "Accept-Encoding":
            "gzip, deflate, br",
            "Accept-Language":
            "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3",
            "Connection":
            "keep-alive",
            # "Cookie": "_gscu_1811078948=41663282vx6ent53; ASP.NET_SessionId=x01pkg455lrliaiky4l3av45; _gscbrs_1811078948=1; _gscs_1811078948=t42005267lfkkmo53|pv:3; cookies=89314150",
            # bidguid=96984f59-dcbf-491f-9bab-a8500ea4f12d; UM_distinctid=16492ad9f0ba4-0c0a2c42949499-444a002e-1fa400-16492ad9f0c2aa; _uab_collina=153146928107553947905014; _umdata=BA335E4DD2FD504F1EDA57F02CFE1964FF30093E1A99816EA3422927037FEEE27E6061217D847EA9CD43AD3E795C914CF0452994C1509D8EB7661DBFB2FCDD56; isshowtcc=isshowtcc; BIDCTER_USERNAME=UserName=jingyingbu666; keywords=%u601D%u79D1; keywords==%e6%80%9d%e7%a7%91; CNZZDATA888048=cnzz_eid%3D1104734771-1531464092-%26ntime%3D1531696377; Hm_lvt_9954aa2d605277c3e24cb76809e2f856=1531469210,1531700960; Hm_lpvt_9954aa2d605277c3e24cb76809e2f856=1531701399; aspcn=id=1277449&name=jingyingbu666&vip=3&company=%e8%8b%8f%e4%ba%a4%e7%a7%91%e9%9b%86%e5%9b%a2%e8%82%a1%e4%bb%bd%e6%9c%89%e9%99%90%e5%85%ac%e5%8f%b8&lianxiren=%e6%b8%b8%e7%8e%89%e7%9f%b3&tel=025-86577542&[email protected]&diqu=&Token=65D51EA060022C3EFFD2BE6B4C79852284FE102150132499D822DB4759BA5217232FAA3570FA85C59F0D4B7BA2A98C4B; PASSKEY=Token=65D51EA060022C3EFFD2BE6B4C79852284FE102150132499D822DB4759BA5217232FAA3570FA85C59F0D4B7BA2A98C4B',
            # "Host": "www.fjggzyjy.cn",
            "Upgrade-Insecure-Requests":
            "1",
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",

            # "Content-Type":"application/x-www-form-urlencoded",

            # "Referer":"https://www.bidcenter.com.cn",
            # 'Cookie': 'ASP.NET_SessionId=edvtzkuc3fir5uo0dgd33pwl; UM_distinctid=166e2d98409596-08af12546c38b9-12656e4a-1fa400-166e2d9840a47e; CNZZDATA888048=cnzz_eid%3D758459646-1541404197-%26ntime%3D1541404197; Hm_lvt_9954aa2d605277c3e24cb76809e2f856=1541404198; Hm_lpvt_9954aa2d605277c3e24cb76809e2f856=1541404198',
        }
        # self.proxy_enable = "http://spider-ip-sync.istarshine.net.cn/proxy_100ms.txt"
        #self.proxy_url = 'http://spider-ip-sync.istarshine.net.cn/proxy_100ms.txt'
        self.request_headers = {'headers': self.headers}

        self.conn_config = redis.StrictRedis.from_url('redis://192.168.1.34/1')
        redis_ip = self.conn_config.get("redis_ip")
        redis_db = self.conn_config.get("redis_db")
        mysql_ip = self.conn_config.get("mysql_ip")
        mysql_databases = self.conn_config.get("mysql_databases")
        mysql_username = self.conn_config.get("mysql_username")
        mysql_password = self.conn_config.get("mysql_password")
        mysql_list_info = self.conn_config.get("mysql_list_info")
        try:
            self.conn = redis.StrictRedis.from_url('redis://{0}/{1}'.format(
                redis_ip, redis_db))
        except:
            self.conn = None
        # self.db = DB ().create ('mysql://*****:*****@192.168.20.247:3306/hbdx')
        self.db = DB().create('mysql://{0}:{1}@{2}:3306/{3}'.format(
            mysql_username, mysql_password, mysql_ip, mysql_databases))
        # self.db = DB ().create ('mysql://*****:*****@localhost:3306/sjk')
        self.table = mysql_list_info

    def get_start_urls(self, data=None):
        '''
        返回start_urls
        '''
        return self.start_urls

    def parse(self, response, url):
        # try:
        #     # response.encoding = self.encoding
        #     # unicode_html_body = response.text
        #     # data = htmlparser.Parser(unicode_html_body)
        # except Exception, e:
        #     return ([], None, None)
        url_list = [
            'http://zw.hainan.gov.cn/ggzy/ggzy/jgzbgg/index.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/jgzbgg/index_2.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/jgzbgg/index_3.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/cggg/index.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/cggg/index_2.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/cggg/index_3.jhtml',
            # 工程建设招标公告
            'http://zw.hainan.gov.cn/ggzy/ggzy/jgzbgs/index.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/jgzbgs/index_2.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/jgzbgs/index_3.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/cgzbgg/index.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/cgzbgg/index_2.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/cgzbgg/index_3.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/cgzbgg/index_4.jhtml',
            # 工程建设中标公告
        ]

        return (url_list, None, None)

    def parse_detail_page(self, response=None, url=None):
        try:
            response.encoding = self.encoding
            unicode_html_body = response.text
            data = htmlparser.Parser(unicode_html_body)
        except Exception, e:
            return []
        from_tag_url = response.url
        print from_tag_url
        # print unicode_html_body

        zhaobgg = [
            'http://zw.hainan.gov.cn/ggzy/ggzy/jgzbgg/index.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/jgzbgg/index_2.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/jgzbgg/index_3.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/cggg/index.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/cggg/index_2.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/cggg/index_3.jhtml',
        ]
        zhongbgg = [
            'http://zw.hainan.gov.cn/ggzy/ggzy/jgzbgs/index.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/jgzbgs/index_2.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/jgzbgs/index_3.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/cgzbgg/index.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/cgzbgg/index_2.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/cgzbgg/index_3.jhtml',
            'http://zw.hainan.gov.cn/ggzy/ggzy/cgzbgg/index_4.jhtml',
            # 工程建设中标公告
        ]
        if from_tag_url in zhaobgg:
            tag = "招标公告"
        elif from_tag_url in zhongbgg:
            tag = "中标公告"
        else:
            tag = "招标公告"
        # titles = re.findall('title="(.*?)"', unicode_html_body)[:-1]
        # dates = re.findall('<td align="center">(.*?)</td>', unicode_html_body)[1::2]
        # links = re.findall('<td align="left"><a href="(.*?)"', unicode_html_body)
        li_content = data.xpathall('''//table[@class="newtable"]//tr''')

        # for title,link,date in zip(titles,links,dates):
        for item in li_content:
            title = item.xpath('''//a/@title''').text().strip()
            link = item.xpath('''//a[@target="_blank"]/@href''').text().strip()
            date = item.xpath('''//td[4]/text()''').text().strip()
            date = str(date).replace("-", "")

            #         titles = data.xpathall('//a[@class="btn btn-default article-list-single"]/@title')
            #         links = data.xpathall('//a[@class="btn btn-default article-list-single"]/@href')
            #         dates = data.xpathall('//span[@class="article-list-date"]/text()')

            if self.getdumps(link):
                continue
            link = str(link)
            uid = str(uuid.uuid5(uuid.NAMESPACE_DNS, link)) + str(
                uuid.uuid3(uuid.NAMESPACE_DNS, link))
            ctime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            location = "海南省"
            service = ''
            industry = ""
            post = {
                "uuid": uid,  # md5
                "detailUrl": link,  # url
                "name": title,  # 标题
                "location": location,  # 地区
                "publicTime": date,  # 公布时间
                "tag": tag,  # 标签
                "site": self.site_domain,
                "siteName": self.siteName,
                "ctime": ctime,
                "industry": industry,
                "service": service
            }

            dic = self.handle_post(post)
            try:
                self.db.table(self.table).add(dic)
            except Exception as e:
                print e

            # str_post = json.dumps (post)

        return
def qiniufetch(url, file_name):
    headers = {"user_agent":
                   "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36"
               }
    if 'http' in url:
        """
            使用代理池
        """
        # image_download = Proxy_contact(app_name='qiniufetch', method='get', url=url, headers=headers)
        # con = image_download.contact()
        # while True:
        #     try:
        #         proxy = proxies[random.randint(0, 9)]
        #         res = requests.get(url, headers=headers, proxies=proxy, timeout=10)  # 图片连接请求
        #         con = res.content
        #         if res.status_code == 200:
        #             break
        #         else:
        #             continue
        #     except Exception as e:
        #         print(e)
        # if con == False:
        #     return None
        try:
            res = requests.get(url, headers=headers, timeout=10)
            con = res.content
        except:
            return False
        with open('article.jpg', 'wb') as f:
            f.write(con)
    else:
        try:
            img_url = 'http:' + url
            res = requests.get(img_url, headers=headers, timeout=10)
            con = res.content
            with open('article.jpg', 'wb') as f:
                f.write(con)
        except:
            log.info('图片格式不标准')
            return False

    filename = uuid.uuid3(uuid.NAMESPACE_DNS, file_name)

    # 需要填写你的 Access Key 和 Secret Key
    access_key = 'qjku2wyeTzY-yXiQ3JuTvkT87kn4OBdrA3VnK46e'
    secret_key = 'JHbwSYk-0e2GqzH8--H-AO9X12BiNYq-qbAdzLY7'
    # 构建鉴权对象
    q = Auth(access_key, secret_key)
    # 要上传的空间
    bucket_name = bucket
    # 上传到七牛后保存的文件名
    key = str(filename)
    # 生成上传 Token,可以指定过期时间等
    token = q.upload_token(bucket_name, key, 3600)
    # 要上传文件的本地路径
    localfile = './article.jpg'
    ret, info = put_file(token, key, localfile)
    # print(info)
    while True:
        try:
            assert ret['key'] == key
            assert ret['hash'] == etag(localfile)
            break
        except:
            continue

    log.info('上传图片{}成功'.format(filename))
    bucket_domain = 'http://image.fangjia.com'
    file_url = bucket_domain + "/" + str(filename)
    return file_url
Example #33
0
def uuid_hash(address1, city, address2, state, country):
    return uuid.uuid3(uuid.NAMESPACE_DNS,
                      address1 + city + address2 + state + country)