def annotAll(): global urlError404 global urlNotError404 notFinished = False print(len(list(collection.find({"status": "error404"})))) for current in collection.find({"status": "error404"}): url = current["expanded_url"] if url not in urlError404 and url not in urlNotError404: print(url) print(len(current["scrap"]["boilerpipe"]["text"])) print(current["scrap"]["boilerpipe"]["text"]) path = strToTmpFile(current["html"], ext="html") sh.firefox(path) answer = input() if answer == "y": urlError404.append(url) elif answer == "n": urlNotError404.append(url) else: notFinished = True break if notFinished: print("notFinished") print("error404") print(urlError404) print("noError404") print(urlNotError404)
def cheatsheet_exec(filename,ext): file_ = filename + ext if ext.strip('.') =='pdf': sh.evince(file_) elif ext.strip('.') in ['png','jpg','jpeg']: sh.eog(file_) elif ext.strip('.') == 'html': sh.firefox(file_)
def amazon_open(self): result = [] for item in self: result.append(self[item].amazon_url) if len(result) < 201: sh.firefox(result) else: return "error: more than 200 results, assuming a mistake has been made ( in amazon_open)"
def crossValidationSeeFailed(self, k=20, openInFirefox=False): # We get the predictor: clf = self.getPredictor() # We get all features: (allFeatures, allLabels) = self.getTrainData() # We chunk all features (k-fold cross-validation): allFeaturesChunks = chunks(allFeatures, k) allLabelsChunks = chunks(allLabels, k) # And for all k-fold, we get failed elements in the test set: failsElementFeatures = [] failsElementRatio = [0] * len(allFeaturesChunks) for i in range(len(allFeaturesChunks)): # We get features and labels of the test set: featuresTestSet = allFeaturesChunks[i] labelsTestSet = allLabelsChunks[i] # Now w get all k-1 features and labels: featuresTrainSet = [] for u in range(len(allFeaturesChunks)): if u != i: featuresTrainSet += allFeaturesChunks[u] labelsTrainSet = [] for u in range(len(allLabelsChunks)): if u != i: labelsTrainSet += allLabelsChunks[u] # We train with the train set: clf.fit(featuresTrainSet, labelsTrainSet) # For each element in the test set, we try to predict it: for u in range(len(featuresTestSet)): currentElementFeatures = featuresTestSet[u] currentElementLabel = labelsTestSet[u] prediction = clf.predict([currentElementFeatures])[0] # And we store it if the prediction is wrong: if currentElementLabel != prediction: failsElementRatio[i] += 1 failsElementFeatures.append(currentElementFeatures) failsElementRatio[i] = (len(featuresTestSet) - failsElementRatio[i] ) / len(featuresTestSet) # We get all files in the same order: all404Files = sortedGlob(self.pattern404) allOkFiles = sortedGlob(self.patternOk) allFiles = all404Files + allOkFiles allFailedFiles = [] # We get failed files: for currentFail in failsElementFeatures: indexFail = allFeatures.index(currentFail) allFailedFiles.append(allFiles[indexFail]) # And we print all failed files: allFailedFiles = list(set(allFailedFiles)) printLTS(allFailedFiles) if openInFirefox: import sh for current in allFailedFiles: sh.firefox(current) # Now we compute the mean: print(sum(failsElementRatio) / len(failsElementRatio))
def cover_browser(self, width=300, link_to='amazon'): """link_to="help" for options.""" if link_to == 'help': return ( "options: amazon libgen goodreads thehiddenbay youtube bing-image-search" ) html_str = self.cover_html(width=width, link_to=link_to) with open('temp.htm', 'w') as f: f.write(html_str) sh.firefox('temp.htm') time.sleep(1) os.remove('temp.htm')
def start_browser(url, browser="chrome", size="1024,768", open_devtools=False, user_agent=None): print("Starting browser %s..." % browser) user_dir = os.getcwd()+"/."+browser try: shutil.rmtree(user_dir) except OSError: pass if browser not in ["firefox", "chrome"]: print("browser %s not yet supported, falling back to chrome" % browser) browser = "chrome" if browser == "chrome": flags = [ "--no-first-run", "--disk-cache-dir=/dev/null", "--disk-cache-size=1", "--window-size=%s" % size, "--user-data-dir=%s" % user_dir, "--disable-popup-blocking", "--media-cache-size=1" ] if open_devtools is True: flags.append("--auto-open-devtools-for-tabs") if user_agent is not None: flags.append("--user-agent=%s" % user_agent) flags.append("--app=%s" % url) sh.google_chrome(*flags) elif browser == "firefox": os.makedirs(user_dir) sh.firefox("--no-remote", "-CreateProfile", "replay "+user_dir) with open(os.path.join(user_dir, "prefs.js"), "w") as f: f.write('user_pref("dom.disable_open_during_load", false);\n') f.write('user_pref("datareporting.healthreport.service_enabled", false);\n') f.write('user_pref("browser.cache.disk.enable", false);\n') f.write('user_pref("browser.cache.disk.smart_size.enabled", false);\n') f.write('user_pref("browser.cache.disk.capacity", 0);\n') if user_agent is not None: f.write('user_pref("general.useragent.override", "%s");\n' % user_agent) dimension = size.split(",") flags = [ "--no-remote", "--new-window", "--new-instance", "-width", "%s" % dimension[0], "-height", dimension[1], "--profile", user_dir, "-url", url ] sh.firefox(*flags)
#!/usr/bin/env python import os,sys import sh print sh.ifconfig("eth0") sh.firefox("google.com")
#!/usr/bin/env python import os, sys import sh print sh.ifconfig("eth0") sh.firefox("google.com")
def play(self): """Opens Firefox with a youtube search for the song""" youtube_str = 'https://www.youtube.com/results?search_query=' song_str = '"%s"+"%s"' % (re.sub( '[ ]+', '+', self.artist), re.sub('[ ]+', '+', self.title)) sh.firefox(youtube_str + song_str)