Esempio n. 1
0
def test_eval(dir):
    seperator = "*" * 60
    print("\n" + seperator)
    print("Testing test case " + dir)
    data['corrected_table_name'] = dir + "_corrected"
    data['distribute_commitments_table'] = dir + "_distribute_commitment"
    data['corrected_table_name'] = dir + "_corrected"
    data['billing_export_table_name'] = data[
        'billing_export_dataset_id'] + '.' + dir + "_export"
    data['load_billing_export_table_name'] = dir + "_export"
    data['commitment_table_name'] = dir + "_commitment"
    data['temp_commitments_table_name'] = dir + "_commitment"
    data['project_label_credit_breakout_table'] = dir + "_project_label_credit"
    if dir.endswith('_b'):
      data['cud_cost_attribution_option'] = 'b'
    else:
      data['cud_cost_attribution_option'] = 'a'
    prepare_consolidated_billing(dir, data)
    dump_result(data['project_id'], data['corrected_dataset_id'],
                data['corrected_table_name'], "tests/" + dir)
    retVal = filecmp.cmp("tests/" + dir + "/output_cmp.json",
                         "tests/" + dir + "/expected_output.json",
                         shallow=False)
    assert retVal == True
    try:
        assert retVal == True
        clean(dir, data)
        print("\n" + 'Test case ' + dir + ' ... PASSED')
    except AssertionError as e:
        print("\n" + 'Test case ' + dir + ' ... FAILED')
Esempio n. 2
0
    def getShare(self, shareID, r=None, asAdmin=False):
        sharePath = h.makePath(self.sharesPath, shareID)
        if not os.path.exists(sharePath): return None, None
        lh, lf = None, h.makePath(h.LOCKS_FOLDER,
                                  "_sfl_share%s" % h.clean(shareID))
        try:
            lh = h.getLockShared(lf, 5)
            shareJson = h.loadJsonFile(sharePath)
            files = shareJson.get("files", None)
            if files is None:
                files = shareJson.get("file", None)
                if files is not None: files = [files]
            s = share(shareJson["ID"], shareJson["creation"], files,
                      shareJson.get("views", []), shareJson.get("password"),
                      shareJson.get("duration", 0))
            if not asAdmin and s.duration > 0 and s.duration + s.creation < h.now(
            ):
                rs, rh = None, "Share has expired"
            else:
                rs, rh = s, "ok"
            h.releaseLock(lh)
            lh = None
            if rs is None: return rs, rh
            if r is not None: rs.tag = h.getURLParams(r.url).get("t", None)

            return rs, rh
        except:
            le, lt = h.getLastExceptionAndTrace()
            return None, le
        finally:
            if lh is not None: h.releaseLock(lh)
Esempio n. 3
0
    def getPasswords(self, path):
        path = h.cleanPath(path)
        passwordsFile = h.makePath(self.basePath, path, ".password")
        if not os.path.exists(passwordsFile): return set()
        passwordsCacheKey = h.makeKeyFromArguments(path)
        lh, lf = None, h.makePath(h.LOCKS_FOLDER,
                                  "_sfl_password_%s" % h.clean(path))
        try:
            self.passwordsLock.acquire()
            if passwordsCacheKey in self.passwordsCache:
                pc = self.passwordsCache[passwordsCacheKey]
                if h.getFileModified(passwordsFile) == pc["date"]:
                    return pc["passwords"]

            lh = h.getLockShared(lf, 5)
            passwords = set([
                p for p in h.readFromFile(
                    h.makePath(self.basePath, path, ".password")).split("\n")
                if p != ""
            ])
            self.passwordsCache[passwordsCacheKey] = {
                "passwords": passwords,
                "date": h.getFileModified(passwordsFile)
            }
            return passwords
        finally:
            self.passwordsLock.release()
            if lh is not None: h.releaseLock(lh)
Esempio n. 4
0
 def setUserPassword(self, path, password, r, response=None):
     path = h.cleanPath(path)
     cookieKey = "_sf_pass_%s" % h.clean(path if path != "" else "-")
     r.cookies = dict(r.cookies)
     r.cookies[cookieKey] = password
     if response is not None:
         response.set_cookie(cookieKey, password, max_age=COOKIE_DURATION)
Esempio n. 5
0
 def addShare(self, shareID, paths, duration, password):
     paths = [path.lstrip("/").rstrip("/") for path in paths]
     sharePath = h.makePath(self.sharesPath, shareID)
     password = "" if password is None else password
     lh, lf = None, h.makePath(h.LOCKS_FOLDER,
                               "_sfl_share%s" % h.clean(shareID))
     try:
         lh = h.getLockExclusive(lf, 5)
         s = share(shareID, h.now(), paths, [], password, duration)
         h.writeJsonFile(
             sharePath, {
                 "ID": s.ID,
                 "files": s.files,
                 "creation": s.creation,
                 "views": s.views,
                 "duration": s.duration,
                 "password": s.password
             })
         if self.user is not None: h.changeFileOwner(sharePath, self.user)
         return s, "ok"
     except:
         le, lt = h.getLastExceptionAndTrace()
         return None, le
     finally:
         if lh is not None: h.releaseLock(lh)
Esempio n. 6
0
def getDefaultAxis(**kwargs):
    global default_
    axisDefault = helper.keyvalToDict(default_['axis'])

    # Also merge the user specified attributes.
    axisDefault = helper.keyvalToDict(kwargs.get('axis_attrib', ''),
                                      axisDefault)

    # Overwrite any default by global.
    for k in axisDefault:
        if kwargs.get(k):
            axisDefault[k] = '%s' % helper.clean(kwargs[k])
            if k in ['title', 'xlabel', 'ylabel']:
                axisDefault[k] = '{ %s }' % helper.clean(kwargs[k])

    axis = ET.Element('axis', **axisDefault)
    return axis
Esempio n. 7
0
    def test_cleanRmFile(self):
        ''' Tests if the file is removed. '''
        filename = 'clean_test_file'
        with open(filename, 'w+') as file:
            file.write("Testing clean method.")
        remove = helper.clean(filename)
        actual = os.path.isfile(filename)
        expected = False

        self.assertFalse(actual, expected)
Esempio n. 8
0
def getContracts(cur, addresses):
    transferred = 0
    counter = 0

    for addr in addresses:
        # checks if it already is in the database
        if queries.selectAddrContracts(cur, addr) == 0:
            # inserts the address in etherscan.io's api and opens the page
            page = helper.getPage(
                'https://api.etherscan.io/api?module=contract&action=getsourcecode&address={}&apikey=RMo8wU2K53Mm'
                .format(addr))

            # extracing the contract
            res = page.json()['result'][0]
            if not isinstance(res, str):
                source = res['SourceCode']

                # adds to the address to the set, if there is a contract
                if len(source) > 0:
                    # removing the comments
                    contract = helper.removeComments(source)

                    # gets the code size
                    codesize = len(contract)

                    # create the AST via script
                    helper.saveToFile(addr, contract)
                    ast = helper.createAST(addr)

                    # adds to database.
                    if queries.insertToContracts(cur, addr, codesize, contract,
                                                 ast, transferred):
                        counter += 1
                        helper.clean(addr)

                else:
                    continue
        else:
            continue

    return counter
Esempio n. 9
0
 def removeShare(self, shareID):
     sharePath = h.makePath(self.sharesPath, shareID)
     if not os.path.exists(sharePath):
         raise Exception("Unknown share", shareID)
     lh, lf = None, h.makePath(h.LOCKS_FOLDER,
                               "_sfl_share%s" % h.clean(shareID))
     try:
         lh = h.getLockExclusive(lf, 5)
         os.remove(sharePath)
         return True
     finally:
         if lh is not None: h.releaseLock(lh)
Esempio n. 10
0
 def _routeShareAdd(self, paths):
     if not self.ap.isAdmin(request): return self._redirect("/admin")
     if self.ap.shareForbidden(paths):
         return self._makeTemplate("forbidden", path=paths)
     alerts = []
     paths = h.decode(paths)
     paths = paths.split("@@@")
     shareID = request.form.get("shareID", "")
     defaultShareID = request.form.get("defaultShareID", h.uniqueIDSmall())
     duration = request.form.get("duration", "")
     durationInSecs = h.parseInt(duration, 0) * 24 * 60 * 60
     password = request.form.get("password-share-add", "")
     shareSubmit = request.form.get("create-share-submit", False)
     shareForceSubmit = request.form.get("create-share-force-submit", False)
     needForce = False
     containers = [p for p in paths if self.ip.isItemContainer(p)]
     if shareSubmit or shareForceSubmit:
         if shareID == "": shareID = defaultShareID
         shareID = h.clean(shareID)
         if shareID == "":
             alerts.append(
                 ["Can't create Share", "Share ID provided is invalid."])
         else:
             if not sp.shareExists(shareID) or shareForceSubmit:
                 share, hint = self.sp.addShare(shareID, paths,
                                                durationInSecs, password)
                 if share is not None:
                     # alerts.append(["Share created", "The Share %s has been created for %s" % (shareID, path)])
                     return self._routeShares(alerts, shareAdded=share)
                 else:
                     alerts.append([
                         "Can't create Share",
                         "Share %s could not be created. Hint: %s" %
                         (shareID, hint)
                     ])
             else:
                 alerts.append([
                     "Can't create Share",
                     "The Share ID %s is already used for %s." %
                     (shareID, ", ".join(paths))
                 ])
                 needForce = True
     return self._makeTemplate("share-add",
                               paths=paths,
                               defaultShareID=defaultShareID,
                               shareID=shareID,
                               duration=duration,
                               alerts=alerts,
                               needForce=needForce,
                               containers=containers)
Esempio n. 11
0
 def getLinks(pageUrl):
     global pages
     clean = helper.clean(url)
     domain = helper.get_domain(clean)
     html = urlopen(clean)
     bsObj = BeautifulSoup(html, "lxml")
     for link in bsObj.findAll("a"):
         if 'href' in link.attrs:
             if link.attrs['href'] not in pages:
                 #We have encountered a new page
                 newPage = link.attrs['href']
                 if helper.valid(newPage, domain):
                     pages.add(newPage)
                     print("%sLink Founds >> %s" % (W, G), newPage)
                     getLinks(newPage)
Esempio n. 12
0
    def crawLink(self, link, mode, outputFile, gd):
        target_link = clean(link)        
        if mode == "accident":
            link_mode = "search_result"       
        elif mode == "ordinary":
            link_mode = "same_side"
            
        print("Craw link for: "+target_link)
        
        gd.write("**** Craw link for: "+target_link+"\n")

        links = self.parser.run(target_link, "link", link_mode)    
        #print("craw_link "+''.join(links))
        if links is not None:
            for l in links:
                if l is not None: gd.write(" + "+l+"\n")
Esempio n. 13
0
def info():
    if request.method == 'POST':
        hadith = request.form['hadith']
        hadith = hp.clean(hadith).strip()
        hadith_narrators = hp.scan_narrator(narrators, hadith)
        vectors = np.array([hp.tfbinary(narrators, hadith_narrators)])
        vectors = np.array(pca.transform(vectors))
        output = nn.predict(vectors)[0]

        return json.dumps({
            'result': output,
            'narrators': hadith_narrators.tolist(),
            'cleaned_hadith': hadith
        })

    return json.dumps(None)
Esempio n. 14
0
 def handle_starttag(self, tag, attrs):
     '''
     Overrid of the default function to handle <a> and ??? tags
     TODO: update this comments when assest handle is done
     '''
     for key, val in attrs:
         if key == "href":
             if contain_static(val):  # handle static files
                 print("-", val)  # show the static file
             elif tag == "a":  # handle links
                 url = urljoin(self.url,
                               val)  # append relative path to the root path
                 url = clean(url)  # clean up url
                 if valid(url, self.domain):
                     self.urls.append(url)  # append url to the return list
             else:
                 pass
Esempio n. 15
0
    def crawl(self, target_url):
        target_url = clean(target_url)  # clean target_url
        self.to_visit.append(target_url)  # put target_url to to_visit list

        while len(self.to_visit) > 0:
            url = self.to_visit.pop(0)  # get next url
            print("The spider is visiting:", url)
            urls = self.parser.run(url)  # parse the url
            self.visted.add(url)  # add this visted url to visted list

            # Add urls from the praser to to_visit lits
            # When they are not visited or already in the to_vist list
            for url in urls:
                if url not in self.visted and url not in self.to_visit:
                    self.to_visit.append(url)

        print("The spider has finished crawling the web at {url}".format(
            url=target_url))
Esempio n. 16
0
 def addNewPassword(self, path, password):
     path = h.cleanPath(path)
     if password is None: return False
     if self.passwordEditForbidden(path): return False
     lh, lf = None, h.makePath(h.LOCKS_FOLDER,
                               "_sfl_password_%s" % h.clean(path))
     try:
         passwordFile = h.makePath(self.basePath, path, ".password")
         requiredPasswords = list(self.getPasswords(path))
         requiredPasswords.append(password)
         lh = h.getLockExclusive(lf, 5)
         h.writeToFile(passwordFile,
                       "\n".join(list(set(requiredPasswords))))
         return True
     except:
         print(h.getLastExceptionAndTrace())
         return False
     finally:
         if lh is not None: h.releaseLock(lh)
Esempio n. 17
0
    def handle_starttag(self, tag, attrs):

        if self.whatcrawling_mode == "link":              # only run when mode: crawling link
            if self.num_link < NUM_LINK_EACH_PAGE:
                # For craw link handle <a> and ??? tags
                for key, val in attrs:
                    if key == "href":
                        if containStatic(val):
                            pass
                        else:                                     # handle links
                            link = urljoin(self.link, val)        # append relative path to the root path
                            link = clean(link)                    # clean up link
                            
                            if (self.link_mode == "search_result" and not checkIsExceptDomain(link)) or (self.link_mode == "same_side" and sameDomain(link, self.domain)):
                                if link.lower() not in self.arr_crawled:
                                    self.arr_links.append(link)       # append link to the return list
                                    self.num_link += 1
                                    self.arr_crawled.append(link.lower())
                

        '''
Esempio n. 18
0
 def saveShare(self, s: share):
     lh, lf = None, h.makePath(h.LOCKS_FOLDER,
                               "_sfl_share%s" % h.clean(s.ID))
     try:
         lh = h.getLockExclusive(lf, 5)
         sharePath = h.makePath(self.sharesPath, s.ID)
         h.writeJsonFile(
             sharePath, {
                 "ID": s.ID,
                 "files": s.files,
                 "creation": s.creation,
                 "views": s.views,
                 "duration": s.duration,
                 "password": s.password
             })
         if self.user is not None: h.changeFileOwner(sharePath, self.user)
         return True
     except:
         le, lt = h.getLastExceptionAndTrace()
         return False
     finally:
         if lh is not None: h.releaseLock(lh)
Esempio n. 19
0
 def test_cleanDiffTypes(self):
     ''' Testing if clean handles if it gets a directory. '''
     actual_dir = helper.clean('test_dir')
     expected = False
     self.assertFalse(actual_dir, expected)
Esempio n. 20
0
 def test_cleanFileNonexisting(self):
     ''' Tests if it crashes if the file does not exist. '''
     actual = helper.clean('clean_test_nofile')
     expected = False
     self.assertFalse(actual, expected)
Esempio n. 21
0
 def tearDownClass(cls):
     clean()
                reviewcount = 0
                successcount = 0

                #iterable = readable_data.find_all('review')
                iterable = readable_data.findall('review')

                #for review in iterable:
                for review in iterable:

                    reviewcount += 1

                    # write review to file
                    try:
                        review_text = review.find('review_text').text
                        review_text = helper.clean(
                            review_text
                        )  # remove newlines, quotation marks, unicode
                        review_text = review_tokenizer.cleanOnereview(
                            review_text, removesinglewords=False
                        )  # tokenize into a list of sentences
                        review_text = " <eos> ".join(
                            review_text)  # join with <eos> tags
                        # TODO: evaluate sentiment classification performance when using <eos> tag

                        if include_title:
                            review_title = review.find('title').text
                            review_title = helper.clean(review_title)
                            review_title = review_tokenizer.cleanOnereview(
                                review_title, removesinglewords=False)
                            review_title = " ".join(review_title)
Esempio n. 23
0
def run_model(training_path, test_path, v_type, model_type, smooth_value):
    trace_file = open("trace_" + str(v_type) + "_" + str(model_type) + "_" +
                      str(smooth_value) + ".txt",
                      "w",
                      encoding="utf-8")

    text_body = parseFile(training_path)
    generate_v(text_body, v_type)

    tweets = helper.clean(v_type, test_path)

    total = 0
    correct = 0

    if model_type == 1:
        unigramMap = unigram.generate_unigram(v0, v_type, smooth_value)
        for tweet in tweets:
            result = unigram.make_guess(tweet, unigramMap)
            generate_trace_file.genetate_trace_file(trace_file, result)
            generate_trace_file.count_result(result)
            total += 1
            if result['isCorrect']:
                correct += 1

    elif model_type == 2:
        bigramMap = bigram.generate_bigram(v0, v_type, smooth_value)
        for tweet in tweets:
            result = bigram.make_guess(tweet, bigramMap)
            generate_trace_file.genetate_trace_file(trace_file, result)
            generate_trace_file.count_result(result)
            total += 1
            if result['isCorrect']:
                correct += 1

    elif model_type == 3:
        trigramMap = trigram.generate_trigram(v0, v_type, smooth_value)
        for tweet in tweets:
            result = trigram.make_guess(tweet, trigramMap)
            generate_trace_file.genetate_trace_file(trace_file, result)
            generate_trace_file.count_result(result)
            total += 1
            if result['isCorrect']:
                correct += 1

    elif model_type == 4:
        bigramMap = bigram.generate_bigram(v0, v_type, 0.00000000000001)
        trigramMap = trigram.generate_trigram(v0, v_type, 0.00000000000001)
        emojis_count = helper.generate_emoji_count(text_body)

        for tweet in tweets:
            result = byom.make_guess(tweet, bigramMap, trigramMap,
                                     emojis_count)
            generate_trace_file.genetate_trace_file(trace_file, result)
            generate_trace_file.count_result(result)
            total += 1
            if result['isCorrect']:
                correct += 1
    trace_file.close()
    accuracy = correct / total
    generate_trace_file.generate_eval(v_type, model_type, smooth_value,
                                      accuracy, total)
    test = generate_trace_file.c
    print('total: ', total)
    print('accuracy: ', accuracy)
    print(correct / total)
Esempio n. 24
0
 def test_clean(self):
     assert clean("http://google.com/#123123") == "http://google.com"
     assert clean("gocardless.com/") == "http://gocardless.com"
     print "[Test] test_clean() pass"
Esempio n. 25
0
 def crawContent(self, link):
     target_link = clean(link)
     print("Craw content for: "+target_link)
     content = self.parser.run(target_link, "content", "")
     return content
def readData(user, helper, path_to_data):
	fbData = open(path_to_data+"/Facebook/U"+str(user)+".txt").read()
	fbData = helper.easyClean(fbData)
	fbData = helper.stem(fbData)
	# print "Facebook data: "+str(len(fbData.split()))+" words"

	linkedInData = open(path_to_data+"/LinkedIn/U"+str(user)+".html").read()
	linkedInExtractions = {}
	title_pattern = re.compile('<p.*class="title"*.>(.*?)<\/p>')
	industry_pattern = re.compile('<a.*?name="industry".*?>(.*?)<\/a>')
	summary_pattern = re.compile('<div class="summary"><p dir="ltr" class="description">([\S\s]*?)<\/div>')
	description_pattern = re.compile('dir="ltr" class="description">([\S\s]*?)<\/p>')
	interrests_pattern = re.compile('<li><a title="Find users with this keyword" href=".*?">([\S\s]*?)<\/a>')
	skills_pattern = re.compile('data-endorsed-item-name="(.*?)">')
	schools_pattern = re.compile('school-name">(.*?)<\/a>')
	majors_pattern = re.compile('<span class="major"><a .*?>(.*?)<\/a>')
	positions_pattern = re.compile('<a title="Learn more about this title" href=.*?>(.*?)<\/a>')
	job_descriptions_pattern = re.compile('<p dir="ltr" class="description summary-field-show-more">(.*?)<\/p>')
	
	# This one might not be needed
	others_also_viewed_people_in_pattern = re.compile('<p class="browse-map-title">(.*?)<\/p>')

	linkedInDescription = description_pattern.findall(linkedInData)
	linkedInTitle = title_pattern.findall(linkedInData)
	linkedInIndustry = industry_pattern.findall(linkedInData)
	if(len(linkedInIndustry) == 0):
		linkedInExtractions["industry"] = ""
	else:
		linkedInExtractions["industry"] = linkedInIndustry[0]

	if(len(linkedInTitle) == 0):
		linkedInExtractions["title"] = ""
	else:
		linkedInExtractions["title"] = linkedInTitle[0]

	# linkedInExtractions["title"] = title_pattern.findall(linkedInData)[0]
	# linkedInExtractions["industry"] = industry_pattern.findall(linkedInData)[0]
	if(len(linkedInDescription) == 0):
		linkedInExtractions["description"] = ""
	else:
		linkedInExtractions["description"] = linkedInDescription[0]
	# linkedInExtractions["description"] = description_pattern.findall(linkedInData)[0]
	linkedInExtractions["interrests"] = interrests_pattern.findall(linkedInData)
	linkedInExtractions["skills"] = skills_pattern.findall(linkedInData)
	linkedInExtractions["schools"] = schools_pattern.findall(linkedInData)
	linkedInExtractions["majors"] = majors_pattern.findall(linkedInData)
	linkedInExtractions["positions"] = positions_pattern.findall(linkedInData)
	linkedInExtractions["jobDescriptions"] = job_descriptions_pattern.findall(linkedInData)

	# This one might not be needed 
	linkedInExtractions["othersAlsoViewed"] = others_also_viewed_people_in_pattern.findall(linkedInData)

	for i in linkedInExtractions:
		if(type(linkedInExtractions[i])==str):
			linkedInExtractions[i] = helper.removeStopwords(helper.stem(helper.easyClean(linkedInExtractions[i])))
		
		elif(type(linkedInExtractions[i]) == list):
			for j in range(0, len(linkedInExtractions[i])):
				linkedInExtractions[i][j] = helper.removeStopwords(helper.stem(helper.easyClean(linkedInExtractions[i][j])))

	# print "LinkedIn data: "+str(len(linkedInExtractions))+" catergories"
	path_to_tweets = path_to_data+"/Twitter/U"+str(user)

	twitterExtractions = {}
	tweetTexts = []

	# Read tweets
	for twitterJsonFile in os.listdir(path_to_tweets):
		if(twitterJsonFile != ".DS_Store"):
			try:
				tweet_data = json.load(open(path_to_tweets+"/"+twitterJsonFile))
				for i in range(0, len(tweet_data)-1):
					if(tweet_data[i]["in_reply_to_status_id"]!=None):
						tweetTexts.append(helper.clean(tweet_data[i]["text"]))
			except:
				pass
				# print "No twitter data for user "+str(user)

	twitterExtractions["tweets"] = tweetTexts

	# print "Extracted tweets: "+str(len(twitterExtractions["tweets"]))

	return {"linkedInData": linkedInExtractions, "facebookData":fbData, "twitterData":twitterExtractions}
Esempio n. 27
0
 def tearDownClass(cls):
     clean()
Esempio n. 28
0
 def crawTitle(self, link):
     target_link = clean(link)
     print("Craw title for: "+target_link)
     title = self.parser.run(target_link, "title", "")
     return title
Esempio n. 29
0
 def getUserPassword(self, path, r: request):
     path = h.cleanPath(path)
     path = h.clean(path if path != "" else "-")
     cookieKey = "_sf_pass_%s" % path
     if cookieKey in r.cookies: return r.cookies[cookieKey]
     else: return None