def uniq_pos(f1,f2):
	nn=0
	nn1=0
	nn2=0
	outff=open(f2,"w")
	pos10=""
	pos20=""
	for line in fileinput.input(f1):
		nn+=1
		l=line.split()
		pos=l[0]
		sign=pos[4]
		if sign=="+":
			if pos!=pos10:
				nn1+=1
				outff.write("%s"%(line))
				pos10=pos
		elif sign=="-":
			if pos!=pos20:
				nn2+=1
				outff.write("%s"%(line))
				pos20=pos
	fileinput.close()
	outff.close()
	print "-- sorted : %d"%(nn);
	print "-- uniq+sorted : %d"%(nn1);
	print "-- uniq-sorted : %d"%(nn2);
	print "-- uniq_sorted : %d"%(nn1+nn2);
예제 #2
0
 def extractSrcFileData(self, path):
     fileinput.close()
     isLocListener = False
     wakeLockAcqRegex = "invoke-virtual(.*?)Landroid/os/PowerManager$WakeLock;->acquire()"
     domRegex = "invoke-virtual(.*?)Ljavax/xml/parsers/DocumentBuilderFactory;->newDocumentBuilder()"
     saxRegex = "invoke-virtual(.*?)Ljavax/xml/parsers/SAXParserFactory;->newSAXParser()"
     xmlppRegex = "invoke-static(.*?)Landroid/util/Xml;->newPullParser()"
     for line in fileinput.input([path]):
         matches = re.findall(wakeLockAcqRegex, line)
         if len(matches) > 0:
             self.numNoTimeoutWakeLocks = self.numNoTimeoutWakeLocks + 1
         if line.startswith(".implements Landroid/location/LocationListener;"):
             self.numLocListeners = self.numLocListeners + 1
             isLocListener = True
         if isLocListener:
             if "\"gps\"" in line:
                 self.numGpsUses = self. numGpsUses + 1
         matches = re.findall(domRegex, line)
         if len(matches) > 0:
             self.numDomParser = self.numDomParser + 1
         matches = re.findall(saxRegex, line)
         if len(matches) > 0:
             self.numSaxParser = self.numSaxParser + 1
         matches = re.findall(xmlppRegex, line)
         if len(matches) > 0:
             self.numXMLPullParser = self.numXMLPullParser + 1
예제 #3
0
    def parse(self, file_location):
        """Loads Kv data into memory
        Args:
            file_location (str): Path of file
        Returns:
            dict: name : numpy array of events
        """

        #This loop both sets the first_line variable, and finds the number of lines in the file
        for file_length, line in enumerate(fileinput.input(file_location)):
            if file_length == 0:
                first_line = line
        fileinput.close()


        parsed = {}

        for x in range(len(first_line.split(","))):
            parsed[first_line.split(",")[x].split("=")[0]] = numpy.zeros(shape=file_length+1, dtype="float64")

        for index, line in enumerate(fileinput.input(file_location)):
            the_line = line.strip("\n")

            for particle_count in range(len(line.split(","))):
                parsed[the_line.split(",")[particle_count].split("=")[0]][index] = numpy.float64(the_line.split(",")[particle_count].split("=")[1])
        fileinput.close()

        return parsed
예제 #4
0
def processDir(dir_proc):
    for file in os.listdir(dir_proc):
        if os.path.isdir(os.path.join(dir_proc, file)):
            print "WARN:%s is a directory" %(file)
            processDir(os.path.join(dir_proc, file))
            continue

        if not file.endswith(".log"):
            print "WARN:%s is not a log file" %(file)
            continue

        print "INFO:process file %s" %(file)
        for line in fileinput.input(os.path.join(dir_proc, file)):
            matchs = nginxLogPattern.match(line)
            if matchs!=None:
                allGroups = matchs.groups()
                ip = allGroups[0]
                time = allGroups[1]
                request = allGroups[2]
                status =  allGroups[3]
                bodyBytesSent = allGroups[4]
                refer = allGroups[5]
#                userAgent = allGroups[6]
                userAgent = matchs.group("userAgent")
                print userAgent

                #统计HTTP状态码的数量
                GetResponseStatusCount(userAgent)
                #在这里补充其他任何需要的分析代码
            else:
                raise Exception

        fileinput.close()
예제 #5
0
    def configuring_nodejs_app(self, git_repo, random_string):

        try:
            file_name = git_repo + "/server.js"
            for line in fileinput.input(file_name, inplace = True):
                match = re.search(r"res.send\(self.cache_get\('index.html.*", line)
                if match:
                    print 'res.send("<html><head></head><body><p>%s</p></body></html>");' % ( random_string )
                else:
                    print line,
        except Exception as e:
            fileinput.close()
            print type(e)
            print e.args
            return 1
        finally:
            fileinput.close()

        deployment_steps = [
            "cd %s" % ( git_repo ),
            "git commit -a -m 'Added special handler for /'",
            "git push" ]

        ( ret_code, ret_output ) = common.command_getstatusoutput(" && ".join(deployment_steps))
        print ret_output
        return ret_code
예제 #6
0
    def _read_multi_column_list(self, list_file):
        rows = []
        if not os.path.isfile(list_file):
            raise RuntimeError('File %s does not exist.' % (list_file,))
        try:
            for line in fileinput.input(list_file):
                if line.strip().startswith('#'):
                    continue
                parsed_line = re.findall('[\w/(-.)]+', line)
                if len(parsed_line):
                    # perform some sanity checks
                    if len(parsed_line) not in (2, 3, 4):
                        raise IOError("The read line '%s' from file '%s' could not be parsed successfully!" % (
                            line.rstrip(), list_file))
                    if len(rows) and len(rows[0]) != len(parsed_line):
                        raise IOError(
                            "The parsed line '%s' from file '%s' has a different number of elements than the first parsed line '%s'!" % (
                                parsed_line, list_file, rows[0]))
                    # append the read line
                    rows.append(parsed_line)
            fileinput.close()
        except IOError as e:
            raise RuntimeError("Error reading the file '%s' : '%s'." % (list_file, e))

        # return the read list as a vector of columns
        return rows
예제 #7
0
 def test_sort_big_file_numeric(self):
     join_fields = '0'
     sorter = mod.CSVSorter(self.dialect, join_fields, self.temp_dir, self.temp_dir)
     outfile = sorter.sort_file(self.fqfn)
     assert outfile == self.fqfn + '.sorted'
     for rec in fileinput.input(self.fqfn + '.sorted'):
         fields = rec.split(',')
         print(fields)
         if fileinput.lineno() == 1:
             assert fields[0] == '1'
         elif fileinput.lineno() == 2:
             assert fields[0] == '2'
         elif fileinput.lineno() == 3:
             assert fields[0] == '3'
         elif fileinput.lineno() == 4:
             assert fields[0] == '4'
         elif fileinput.lineno() == 5:
             assert fields[0] == '5'
         elif fileinput.lineno() == 6:
             assert fields[0] == '6'
         elif fileinput.lineno() == 7:
             assert fields[0] == '7'
         elif fileinput.lineno() == 8:
             assert fields[0] == '8'
         elif fileinput.lineno() == 9:
             assert fields[0] == '9'
         elif fileinput.lineno() == 10:
             assert fields[0] == '10'
         else:
             assert 0, 'too many rows returned'
     fileinput.close()
예제 #8
0
def remove_line(line_search, filepath):
    for line in fileinput.input(filepath, inplace=True):
        if line_search in line.strip():
            continue
        else:
            print(line.rstrip("\n"))
    fileinput.close()
    def test_full_single_file(self):
        """ Tests use of columns all against a single file.
        """
        cmd = "%s %s -c '1,2' -d '|' -o %s " % (os.path.join(script_path, 'gristle_freaker'), self.easy_fqfn, self.out_fqfn)
        p =  subprocess.Popen(cmd,
                              stdin=subprocess.PIPE,
                              stdout=subprocess.PIPE,
                              close_fds=True,
                              shell=True)
        records =  p.communicate()[0]
        assert p.returncode == 0
        #for record in records.split('\n'):
        #    print record
        out_rec_cnt = 0
        for rec in fileinput.input(self.out_fqfn):
            out_rec_cnt += 1
            fields     = rec[:-1].split('-')
            key_col_1  = fields[0].strip()
            key_col_2  = fields[1].strip()
            freq_cnt   = int(fields[2])

            assert key_col_1 == 'A'
            assert key_col_2 == 'B'
            assert freq_cnt  == 100

        fileinput.close()
        assert out_rec_cnt == 1
        p.stdin.close()
예제 #10
0
파일: dirstack.py 프로젝트: bricef/dotfiles
def get(index):
  for line in fileinput.input(STACK_FILE):
    if fileinput.lineno() == index:
      fileinput.close()
      return line
  fileinput.close()
  raise RuntimeError("The index selected does not exist.")
예제 #11
0
def swap_lines(line_search_1, line_search_2, filepath):
    """
    Swap lines in file, if line_search1 before line_search2
    :param filepath:
    :param line_search_2:
    :param line_search_1:
    """
    count = 0
    count1 = 0
    count2 = 0
    for line in fileinput.input(filepath):
        count += 1
        if line_search_1.strip() in line.strip():
            count1 = count
        elif line_search_2.strip() in line.strip():
            count2 = count
    fileinput.close()

    if 0 < count1 < count2:
        for line in fileinput.input(filepath, inplace=True):
            if line_search_1.strip() in line.strip():
                print(line_search_2)
            elif line_search_2.strip() in line.strip():
                print(line_search_1)
            else:
                print(line.rstrip("\n"))
        fileinput.close()
예제 #12
0
def upload(filename, dbname, session):
    blockcounter = 0
    rowcounter = 0
    requestdata = dict(new_edits=False,docs=[])
    for line in fileinput.FileInput(filename):
        try:
            line = line.rstrip()
            if line[-1] == ',':
                line = line[:-1]

            bloated_doc = json.loads(line)

            if blockcounter >= config['blocksize']:
                #update db
                updatedb(dbname, requestdata, session)
                #reset the temp dict and counter
                requestdata = dict(new_edits=False,docs=[])
                blockcounter = 0

            #add row to temp dict
            requestdata['docs'].append(bloated_doc['doc'])
            #increment the row counter
            blockcounter += 1        
        except:
            if rowcounter != 0 and line != ']}':
                print 'An exception occured on line {0}'.format(rowcounter)
        finally:
            rowcounter += 1
    fileinput.close()
        
    #write any remaining rows to the database
    updatedb(dbname, requestdata, session)

    print 'Database "{0}" uploading completed.'.format(dbname)
def load_dbpedia(data, database_1, database_2):
    for line in fileinput.input(data):
        e1, rel, e2, p = line.split()
        e1 = e1.split('<http://dbpedia.org/resource/')[1].replace(">", "")
        e2 = e2.split('<http://dbpedia.org/resource/')[1].replace(">", "")
        e1 = re.sub("_", " ", e1)
        e2 = re.sub("_", " ", e2)

        if "(" in e1 or "(" in e2:
            e1 = re.sub("\(.*\)", "", e1)
            e2 = re.sub("\(.*\)", "", e2)

            # store a tuple (entity1, entity2) in a dictionary
            database_1[(e1.strip(), e2.strip())].append(p)

            # store in a dictionary per relationship: dict['ent1'] = 'ent2'
            database_2[e1.strip()].append(e2.strip())

        else:
            e1 = e1.decode("utf8").strip()
            e2 = e2.decode("utf8").strip()
            # store a tuple (entity1, entity2) in a dictionary
            database_1[(e1, e2)].append(p)

            # store in a dictionary per relationship: dict['ent1'] = 'ent2'
            database_2[e1.strip()].append(e2.strip())

    fileinput.close()

    return database_1, database_2
예제 #14
0
def realbacktest(ticker="AAPL", start="2014-01-01", end="2015-10-23", duration=20, commission=2, file="test3"):
    global initialPrice

    try:
        startDate = datetime.strptime(start, "%Y-%m-%d")
        endDate = datetime.strptime(end, "%Y-%m-%d")
    except:
        print("wrong date format! Expect: %Y-%m-%d")
        return

    if (endDate - startDate).days < duration:
        print("duration larger than Duration")
        return

    stock = yahoo_finance.Share(ticker)
    stockHistory = stock.get_historical(start, end)
    stockHistory = stockHistory[::-1]
    realGain = getGains(stockHistory, duration, commission)

    fd = open(file, "wb")
    fd.write(str(realGain) + " net return, moving average\n")
    fd.write(
        str(
            (float(stockHistory[-1]["Adj_Close"]) - float(stockHistory[0]["Adj_Close"]))
            / float(stockHistory[0]["Adj_Close"])
        )
        + " buy and hold return\n"
    )
    close()
예제 #15
0
 def replaceInplace(f,searchExp,replaceExp):
         import fileinput
         for line in fileinput.input(f, inplace=1):
             if searchExp in line:
                 line = line.replace(searchExp,replaceExp)
             sys.stdout.write(line)
         fileinput.close() # reported by jakob
예제 #16
0
 def extractSrcFileData(self, path):
     fileinput.close()
     for line in fileinput.input([path]):
         matches = re.findall("invoke-virtual (.*?), Landroid/(.*?);->dismiss\(", line)
         if len(matches) > 0:
             self. dismiss = self.dismiss + 1
         matches = re.findall("invoke-virtual (.*?), Landroid/(.*?);->show\(", line)
         if len(matches) > 0:
             self.show = self.show + 1
         matches = re.findall("invoke-virtual (.*?), (.*?);->setContentView\(", line)
         if len(matches) > 0:
             self.setContentView = self.setContentView + 1
         matches = re.findall("invoke-virtual (.*?), Landroid/(.*?);->createScaledBitmap\(", line)
         if len(matches) > 0:
             self. createScaledBitmap = self.createScaledBitmap + 1
         matches = re.findall("invoke-virtual (.*?), (.*?);->onKeyDown\(", line)
         if len(matches) > 0:
             self.onKeyDown = self.onKeyDown + 1
         matches = re.findall("invoke-virtual (.*?), Landroid/(.*?);->isPlaying\(", line)
         if len(matches) > 0:
             self.isPlaying = self.isPlaying + 1
         matches = re.findall("invoke-virtual (.*?), (.*?);->unregisterReceiver\(", line)
         if len(matches) > 0:
             self.unregisterReceiver = self.unregisterReceiver + 1
         matches = re.findall("invoke-virtual (.*?), (.*?);->onBackPressed\(", line)
         if len(matches) > 0:
             self. onBackPressed = self.onBackPressed + 1
         matches = re.findall("invoke-virtual (.*?), (.*?);->showDialog\(", line)
         if len(matches) > 0:
             self.showDialog = self.showDialog + 1
         matches = re.findall("invoke-virtual (.*?), Landroid/(.*?);->create\(", line)
         if len(matches) > 0:
             self.create = self.create + 1
def extract_weibo(_index_begin, _index_end):
    _file = open("weibo_place_weibo_{0}_{1}_extracted.txt".format(_index_begin, _index_end), "w")
    _index, _curr_poid, _curr_poid_index, _curr_poid_count, _time_begin, _time_end = 0, "", 0, 0, "", ""
    for line in fileinput.input("weibo_place_weibo_{0}_{1}.txt".format(_index_begin, _index_end)):
        # _index += 1
        # print _index
        _poid = line.strip().split('\t')[0]
        _content = ' '.join(line.strip().split('\t')[2:])
        try:
            _uid = re.findall(r'<a.*?class="card_content" alt="(.+?)">', _content)[0].strip()
            _text = re.sub(r'<.+?>','',re.findall(r'</a>:([\s\S]*?)</a>', _content)[0]).strip()
            _date = re.findall(r'<a.*?class="date">(.*?)</a>', line)[0].strip()
            _loc, _lng, _lat = re.findall(r'<a class="showmapbox" action-data="(.*?)\|(.*?),(.*?)\|0\|.*?">', _content)[0]
            _lng, _lat = round(float(_lng),3), round(float(_lat),3)
            _cord = "{0}|{1},{2}".format(_loc.strip(), _lng, _lat)
            _file.write("{0}\t{1}\t{2}\t{3}\t{4}\n".format(_poid,_date,_cord,_uid,_text))
            # validation
            if _poid != _curr_poid:
                if _curr_poid != "":
                    print _curr_poid_index, _curr_poid, _curr_poid_count, _time_begin, _time_end
                _curr_poid, _curr_poid_index, _curr_poid_count, _time_begin = _poid, _curr_poid_index+1, 0, _date
            else:
                _time_end, _curr_poid_count = _date, _curr_poid_count+1
        except:
            continue
    fileinput.close()
    _file.close()
예제 #18
0
def load_img_list(img_list_file):
    global dc

    gc.disable()
    count = 0
    for line in fileinput.input(img_list_file):
        img1_file, img2_file, ofx_file, ofy_file = line.strip().split(' ')
        #print "[",count,"] ", img1_file, img2_file, ofx_file, ofy_file

        if img1_file not in dc:
            #id, key = get_img_key(img1_file, False)
            imgs.append(img1_file)
            #keys.append(key)
            dc[img1_file] = 1

        if img2_file not in dc:
            #id, key = get_img_key(img2_file, False)
            imgs.append(img2_file)
            #keys.append(key)
            dc[img2_file] = 1

        count += 1
        if max_images > 0 and  count > max_images:
            break

        #string_ = str(block_first + in_idx + 1) + ' / ' + str(len(img_list))
        if (count % 1000) == 0:
            sys.stdout.write("\r%d" % count)
            sys.stdout.flush()

    gc.enable()
    fileinput.close()
    print

    return
예제 #19
0
파일: gmXmlDocDesc.py 프로젝트: sk/gnumed
    def __load_obj_list(self):
        """Read list of image files from XML metadata file.

        We assume the order of file names to correspond to the sequence of pages.
        - don't use self.__get_from_xml, because we want to
          scan lines sequentially here
        """
        self.__data['objects'] = {}
        tag_name = self.__cfg.get(self.__group, "obj_tag")
        # now scan the xml file
        idx = 0
        for line in fileinput.input(self.__xml_file):
            content = self.__extract_xml_content(line, tag_name)
            if content is None:
                continue
            idx += 1
            tmp = {}
            tmp['file name'] = os.path.abspath(os.path.join(self.__base_dir, content))
            # this 'index' defines the order of objects in the document
            tmp['index'] = idx
            # we must use imaginary oid's since we are reading from a file,
            # this OID defines the object ID in the data store, this
            # has nothing to do with the semantic order of objects
            self.__data['objects'][idx] = tmp

        # cleanup
        fileinput.close()

        if idx == 0:
            _log.warning("no files found for import")
            return None

        _log.debug("document data files to be processed: %s" % self.__data['objects'])

        return 1        
예제 #20
0
def plot_chord_window_transfer():
	window_map = {}
	for line in fileinput.input("data_processed/chord_window_transfer.txt"):
		window_1, window_2, v = line.strip().split("\t")[0].split(",")[0], line.strip().split("\t")[0].split(",")[1], line.strip().split("\t")[1]
		if not window_map.has_key(window_1):
			window_map[window_1] = {"canteen":"","total":0,"stay":0,"price":0}
		if window_2 == window_1:
			window_map[window_1]["stay"] = int(v)
		window_map[window_1]["total"] += int(v)
	fileinput.close()

	for line in fileinput.input("data_processed/tree_window.txt"):
		(canteen, window, v1, v2, v3) = line.strip().split("\t")
		window_map[window]["canteen"] = canteen
		window_map[window]["price"] = float(v3)
	fileinput.close()

	with open("data2js.txt","w") as f:
		for window, values in window_map.iteritems():
			if values["canteen"] == "一餐":
			# if values["canteen"] == "二餐":
			# if values["canteen"] == "三餐":
			# if values["canteen"] == "四餐":
			# if values["canteen"] == "五餐":
			# if values["canteen"] == "六餐":
			# if values["canteen"] == "哈乐":
				f.write("["+str(round(values["price"],2))+","+str(round(100.0*values["stay"]/values["total"],2))+"],\n")
예제 #21
0
def calctrigramcount(inputfilename,trigramcountmap) :	
	global alphabets

	for inputline in fileinput.input(inputfilename) :
		inputline = inputline.strip('\r\n')
		inputline = inputline.strip(' ')
		if len(inputline) == 0 :
			continue
		start = 0
		currentletter = ''
		previousletter1 = ''
		previousletter2 = ''
		for character in inputline :
			if character == ' ' :
				continue
			currentletter = character
			if start == 0 :
				previousletter1 = 'S'
				previousletter2 = 'S'
				token = previousletter1 + previousletter2 + currentletter
				trigramcountmap[token] += 1.0
				previousletter1 = previousletter2
				previousletter2 = currentletter
				start = 1
				continue
			token = previousletter1 + previousletter2 + currentletter
			trigramcountmap[token] += 1.0
			previousletter1 = previousletter2
			previousletter2 = currentletter
		currentletter = 'E'
		token = previousletter1 + previousletter2 + currentletter
		trigramcountmap[token] += 1.0
	fileinput.close()
	return trigramcountmap
예제 #22
0
 def parse_email(self):
     db = JsonDbJetty()
     rawfile = db.get_info('article', 'raw_file')
     bodyfile = db.get_info('article', 'body_file')
     shutil.copyfile(rawfile, bodyfile)
     cxt_flag = False
     regx = re.compile('^,,,,$')
     for l in fileinput.input(bodyfile, inplace=True):
         if cxt_flag == True:
             #print l
             sys.stdout.write(l)
         else:
             theline = l.strip()
             if regx.match(theline):
                 cxt_flag = True
                 continue
             if theline.startswith('class='):
                 self.blogclass=theline.replace('class=','')
                 continue
             if theline.startswith('title='):
                 self.blogtitle=theline.replace('title=','')
                 continue
     fileinput.close()
     if self.blogclass and self.blogtitle and cxt_flag==True:
         return 1
     else:
         return 0
예제 #23
0
def consist(infile,PathToTaxonomy):
	TargetCategories={}
	outfile=open(infile+".con",'w')
	catdict={}
	for line in fileinput.input([PathToTaxonomy+"/nodes.dmp"]):
		DictValues=line.split('\t')
		catdict[DictValues[0]]=DictValues[4]
	fileinput.close()
	for line in fileinput.input([infile]):
		HitValues=line.strip().split('\t')
		TargetCategories[HitValues[0]]={'perc':HitValues[2],'species':[],'genus':[],'family':[],'order':[],'phylum':[],'class':[],'kingdom':[]}
	fileinput.close()
	for line in fileinput.input([infile]):
		HitValues=line.strip().split('\t')
		n=12
		while n<len(HitValues):
			for cat in TargetCategories[HitValues[0]].keys():
				if catdict[HitValues[n]]==cat:
					if HitValues[n] not in TargetCategories[HitValues[0]][cat]:
						TargetCategories[HitValues[0]][cat].append(HitValues[n])
			n=n+1
	print "Number of ids with taxonomic information:" + str(len(TargetCategories))
	for ID in TargetCategories.keys():
		LCA_per_ID={}
		for cat in TargetCategories[ID].keys():
			if len(TargetCategories[ID][cat])>1:
				LCA_per_ID[cat]=str(','.join(TargetCategories[ID][cat]))
			elif len(TargetCategories[ID][cat])==0:
				LCA_per_ID[cat]='n'+str(len(TargetCategories[ID][cat]))
			else:
				LCA_per_ID[cat]=TargetCategories[ID][cat][0]
		outfile.write(ID+'\t'+TargetCategories[ID]['perc']+'\t'+LCA_per_ID['species']+'\t'+LCA_per_ID['genus']+'\t'+LCA_per_ID['family']+'\t'+LCA_per_ID['order']+'\t'+LCA_per_ID['class']+'\t'+LCA_per_ID['phylum']+'\t'+LCA_per_ID['kingdom']+'\n')
	outfile.close()
예제 #24
0
    def readfilelist(self):
        file_list = self.file_list
        directory = self.directory
	table1 = self.table1
	db1 = self.db1
	cursor1 = self.cursor1
        ast_lit = ast.literal_eval

        open_file_object_list = [os.path.join(directory, filename) for filename in file_list]

	tuple_sku = ["'%s'" %(ast_lit(line)[0]) for line  in fileinput.input(open_file_object_list)]
        fileinput.close()
  
        sql = """update %s set upload_image_status = "YES" where product_id in (%s)""" %(table1, ", ".join(tuple_sku))

        #print sql
        #try:
        cursor1.execute(sql)
        db1.commit()
        print "updated........................."
        #except:
        #    db1.rollback()

        del open_file_object_list[:]
        del open_file_object_list
        del tuple_sku[:]
        del tuple_sku
예제 #25
0
    def _get_format_type(self) -> str:
        """ Determines format type based on whether or not all records
            are of the same length.

            Returns either 'csv' or 'fixed'
        """
        # our solution isn't accurate enough to show yet, so for now just
        # set to 'csv':
        return 'csv'

        #todo:  make this smarter:
        #       - Since we're not using a csv dialect we could have control 
        #         characters breaking a row into multiple lines.
        #       - Also, a small csv file might really have all rows get the same
        #         length.
        #       - Also, the user may be passed in explicit csv dialect info.
        rec_length = collections.defaultdict(int)
        for rec in fileinput.input(self.fqfn):
            rec_length[len(rec)] += 1
            if fileinput.lineno > 1000:     # don't want to read millions of recs
                break
        fileinput.close()

        if len(rec_length) == 1:
            return 'fixed'
        else:
            return 'csv'
예제 #26
0
 def parseFromPkg(self,pkgfile):
     ''' parse all bus record from pkg file '''
     beginTime = datetime.now()
     self.pkgfile = pkgfile
     self.pkgdir = os.path.dirname(self.pkgfile)
     if not os.path.isfile(self.pkgfile):
         self.faildExit("pkg file [%s] is not exists" % pkgfile)
     self._checkPkgFile()
     
     lineno = 1
     for line in fileinput.input(pkgfile):
         if lineno == 1:
             if not self._readPkgHead(line): # 分析包裹单文件头
                 self.faildExit("pkg head format error")
         elif lineno - 1 <= self.recordCount :
             if len(line) == 0: continue
             if not self._checkRecordFile(line): # 检查文件是否存在
                 self.faildExit("record file[%s] not exists or size error" % line)
         else: break
         lineno = lineno + 1
         
     if len(self.recordFiles) != self.recordCount:
         self.faildExit("pkg file list not match real files")
     
     fileinput.close()
     self._makeStructDef()
         
         
     r,m = self._genSQL() # 生成SQL 文件
     if r != 0:
         self.faildExit(m)
     self._finishRemoveFile(pkgfile)
     delta = datetime.now() - beginTime
     total = delta.seconds + delta.microseconds / 1000000.0
     print "Success : [%s] total[%d] time[%.2f]second" % (self.SQLFile,self.totalRecord,total)
예제 #27
0
 def _checkMd5(self,tarfile):
     ''' 验证 MD5 '''
     md5file = self._getMD5FileName(tarfile)
     if not os.path.exists(md5file):
         print "md5 file not exists[%s]" % md5file
         return False
     
     m = None
     if sys.hexversion >= 0x02050000:
         import hashlib
         m = hashlib.md5()
     else:
         import md5
         m = md5.md5()
     tarhandle = open(tarfile,'rb')
     while True:
         data = tarhandle.read(1024)
         if len(data) > 0:
             m.update(data)
         if len(data)<1024:
             break
     tarhandle.close()
     digest = m.hexdigest().upper()
     md5fileDigest = ''
     for line in fileinput.input(md5file):
         md5fileDigest = line.upper().strip()
         break
     fileinput.close()
     if digest == md5fileDigest[:32]:
         return True
     print "digest[%s][%s]" % (digest ,md5fileDigest)
     return False
예제 #28
0
    def cleanup(self):
        """
        Called only if PackStack Setup is successful. Returns modified system environment to default.
        It performs the following activities:
        1. Enable Repositories, during installation default repos are disabled {for offline install}
        (NOTE: EPEL repo is not changed back to default, it still points to /root/cloud/)
        2. Remove rabbitmq entry from /etc/hosts file
        3. Disable initial-setup-text service

        """
        count = 3
        for line in fileinput.input(ROOT_PATH + "/etc/yum.repos.d/CentOS-Base.repo", inplace=True):
            if line.startswith("enabled"):
                if count > 0:
                    count -= 1
                    print(line.replace("enabled=0", "enabled=1").rstrip("\n"))
                else:
                    print line,
            else:
                print line,
        fileinput.close()

        for line in fileinput.input(ROOT_PATH + '/etc/hosts', inplace=True):
                print(line.replace("127.0.0.1 www.rabbitmq.com", "").rstrip("\n"))
        fileinput.close()

        ret = iutil._run_systemctl("disable", "initial-setup-text")
        if ret:
            print ("Failed to Disable INITIAL-SETUP-UTILITY\n")
            return False
        os.remove(os.path.normcase(ROOT_PATH + "/var/www/html/rabbitmq-signing-key-public.asc"))
        return True
예제 #29
0
파일: frag_array.py 프로젝트: bsmithers/hpf
def gather():
    util.debug("Gathering result")
    job_results = results
    paths.ensure(job_results)
    
    
    #aazj00103_05.075_v1_3.bz2  zj001.fasta    zj001.psipred_ss2
    #aazj00109_05.075_v1_3.bz2  zj001.psipred
    aa_pattern = "aa"+name+"(03|09)\_05\.075\_v1\_3$"
    patterns = [aa_pattern]
    for suf in [".fasta", ".psipred", ".psipred_ss2"]:
        patterns.append(name+suf)
    
    try:
        for pattern in patterns:
            print "Pattern ",pattern
            match = paths.find(pattern, scr_job)
            if not match:
                raise "Missing file"
            for path in match:
                if pattern == aa_pattern:
                    print "Filtering Columns %s" % path
                    for line in fileinput.input(path, inplace=1):
                        print " "+line[:47].strip()
                    fileinput.close()
                print "Path ", path
                file = paths.getFile(path)
                dest = paths.join(job_results, file)
                util.copy(path, job_results)
                if pattern == aa_pattern:
                    util.system("bzip2 %s"  % dest)
    except:
        paths.removerf(job_results)
        raise
예제 #30
0
파일: sort.py 프로젝트: BBOOXX/stash
def main(args):
    ap = argparse.ArgumentParser()
    ap.add_argument('files', nargs='*', help='files to sort')
    ap.add_argument('-r', '--reverse', action='store_true', default=False,
                    help='reverse the result of comparisons')
    ns = ap.parse_args(args)

    def _print(lines):
        if lines is not None:
            lines = sorted(lines)
            if ns.reverse:
                lines = lines[::-1]
            print(''.join(lines))

    fileinput.close()  # in case it is not closed
    try:
        lines = None
        for line in fileinput.input(ns.files):
            if fileinput.isfirstline():
                _print(lines)
                lines = []
            lines.append(line)

        _print(lines)

    finally:
        fileinput.close()
예제 #31
0
    def method_obfuscation(self):
        """
    	Method name obfuscation in invoke-families and fields
    	   ->function() ==> ->encrypt()
    	   Lclass; => Lencrypt;
    	   field: => encrypt_field:

        [invoke pattern handling]
            1. check -> 
            2. change method name()
              1) if method name belongs to classname_list : change
              2) if method name belongs to API : not change
            3. change refered field name()
              1) skip R.class now
                - affect layout, ids, public, class's local "definition"
              2) change others
                - iget-boolean v0, p0, Lcn/smstelphoneapp/service/STAService;->g:Z
                 => ->enc:Z
            4. change class name()
            * MEMO- A:class (field A from class)
        [method pattern handling]
        [field definition handling]
        [etc pattern handling]
        """

        print "[*] Method name obfuscation"

        for index in range(len(self.smali_path)):
            full_path = ret_fullpath(self.target, self.smali_path[index],\
                self.smali_filename[index])
            only_filename = self.smali_filename[index]
            #logging.info(only_filename)

            for line in fileinput.input(full_path, inplace=1):
                #for line in open(full_path,'r').readlines():
                changed = False

                "invoke_pattern change ->"
                # 1. check ->
                if ";->" in line:

                    left_class_name = ret_class_from_method_call(line,
                                                                 first=True)

                    # 2.1 - if class exist in scanned class list:
                    if self.check_classname_exist(left_class_name) == True:

                        # 2.2 - if method name belong to original class => mod
                        #logging.info(extract_method_name(line))
                        #logging.info(line.strip())
                        if self.is_method_in_class(extract_method_name(line), \
                            left_class_name):
                            line = mod_method_call_name(line).rstrip() + "\n"

                    # 3 - change referred field name()
                    if ':' in line:
                        left_class_name = ret_class_from_method_call(
                            line, first=True)
                        #print "debug:"+left_class_name
                        if self.check_classname_exist(left_class_name) == True:

                            line = mod_field_reference(line)

                    # 4 - change classname()
                    line = self.mod_line_class(line)

                    # MEMO for test
                    sys.stdout.write(line)
                    continue

                elif is_method_pattern(line):
                    ".method pattern change, execpt for blacklist_functions"
                    #logging.info(line.strip())
                    line = mod_method_define_name(line) + "\n"
                    line = self.mod_line_class(line)
                    #logging.info(line)

                    # MEMO for test
                    sys.stdout.write(line)
                    continue

                # .field definition pattern?
                elif '.field ' in line:
                    "find field definition section"

                    # TODO : have to handle corner case with double LL
                    # e.g., .field public static final IAB_LEADERBOARD:Lcom/google/ads/AdSize;
                    # L1234:Lcom/a/a/c; => what is between L~; ?

                    line = mod_field_define_name(line)
                    if ':' in line:
                        first = line.split(':')[0]
                        second = line.split(':')[1:]
                        second = ''.join(second)

                        line = first + ":" + self.mod_line_class(second)

                # MEMO for test
                    sys.stdout.write(line)
                    continue

                elif 'const-string ' in line:
                    reg, string = ret_string_reg(line)

                    if is_fieldname_in_blacklist(string):
                        sys.stdout.write(line)
                        continue

                    if string in self.field_list:
                        if '/' in line:
                            line = line.replace("\"" + string + "\"",
                                                "\"" + rot13(string) + "\"")
                        else:
                            line = line.replace(
                                "\"" + string + "\"",
                                "\"" + replace_upper_L(rot13(string)) + "\"")
                    sys.stdout.write(line)
                    continue

                    #elif is_class_reference(line):
                    """
                    Just using class instance => should handle
                       ex) const-class v1, Lcn/smstelphoneapp/service/STAService
                       this handles: .class definition and const-class...
                    """
                    #line = self.mod_line_class(line)

                    #print "debug:class_reference"
                    #if 'obrfcwr/giddcfh/j7/odd/OddQcndohOqhwjwhm' in line:
                    #    logging.info('HERE')

                line = self.mod_line_class(line)
                sys.stdout.write(line)
            fileinput.close()
예제 #32
0
    # X, labels = X[0:100], labels[0:100]
    # Y = tsne(X, 2, 50, 20.0);
    # Plot.scatter(Y[:,0], Y[:,1], 20, labels);
    # plt.scatter(Y[:,0], Y[:,1], 20, labels)
    # plt.show()

    vectormap = {}
    for line in fileinput.input("../../data/word2vec/vectors.weibo.txt"):
        try:
            word, vector = line.strip().split("\t")[0], [
                float(i) for i in line.strip().split("\t")[1].split(" ")
            ]
            vectormap[word] = vector
        except:
            continue
    fileinput.close()
    W, X, y = [], [], []
    for line in fileinput.input("data/pos_eva.sort.txt"):
        word = line.strip()
        if vectormap.has_key(word) and len(word.decode("utf-8")) >= 2:
            W.append(word)
            X.append(vectormap[word])
            y.append("1")
            if len(W) == 200:
                break
    fileinput.close()
    for line in fileinput.input("data/pos_emo.sort.txt"):
        word = line.strip()
        if vectormap.has_key(word) and len(word.decode("utf-8")) >= 2:
            W.append(word)
            X.append(vectormap[word])
예제 #33
0
def parsefile(filename_ppd='../data/static_info_ppd.csv',
              filename_zmq='../data/static_info_zmq.txt',
              write_overlap=False):
    def parse(f_tags, f_score, f_averageProfit, f_registMoney, f_autobid,
              f_stockTransfer, f_fundsToken, f_guaranteeMode, f_guaranteeOrg,
              f_lauchTime, f_category, f_lng, f_lat):
        def extr_tags(x):
            types = ['国资系', '上市公司系', '银行系', '民营系']  # 公司类别
            others = ['投之家合作平台', '股权上市', '接受过风投', '争议', '加入第三方征信', '加入协会']
            r = [0] * 7
            tags = x.split(',')
            for i in xrange(len(tags)):
                if tags[i] in types: r[0] = types.index(tags[i]) + 1
                if tags[i] in others: r[others.index(tags[i]) + 1] = 1
            return r

        def extr_ones(x, cut):
            x = x.strip(cut)
            return 0 if x == '' else float(x)

        def extr_autobid(x):
            return 0 if x == '' else 1 if x == '支持' else -1

        def extr_stockTransfer(x):
            return -1 if x == '' else 0 if x == '随时' else 12 if x == '1年' else 300 if x == '不可转让' else x.strip(
                '个月')

        def extr_fundsToken(x):
            return 0 if x == '' or x == '无托管' else 1

        def extr_ifGuarantee(x):
            return 0 if x == '' else 1

        def extr_lauchTime(x):
            import datetime

            def date_difference(d1, d2):
                if '-' in d1 and '-' in d2:
                    d1 = datetime.datetime.strptime(d1 + ' 00:00:00',
                                                    '%Y-%m-%d %H:%M:%S')
                else:
                    d1 = datetime.datetime.strptime(d1 + ' 00:00:00',
                                                    '%Y年%m月%d日 %H:%M:%S')
                d2 = datetime.datetime.strptime(d2 + ' 00:00:00',
                                                '%Y-%m-%d %H:%M:%S')
                return (d2 - d1).days / 30

            return 6 if x == '' else date_difference(x, '2016-05-01')

        def extr_category(x):
            _dict = {'股份合作企业':0, '私营企业':1, '港、澳、台投资企业':2, '股份制企业':3, \
               '集体所有制企业':4, '外商投资企业':5, '国有企业':6, '联营企业':7}
            return _dict.get(x, -1)

        return extr_tags(f_tags)+\
            [extr_ones(f_score,''),\
          extr_ones(f_averageProfit,'%'),\
          extr_ones(f_registMoney,' 万元'),\
          extr_autobid(f_autobid),\
          extr_stockTransfer(f_stockTransfer),\
          extr_fundsToken(f_fundsToken),\
          extr_ifGuarantee(f_guaranteeMode),\
          extr_ifGuarantee(f_guaranteeOrg),\
          extr_lauchTime(f_lauchTime),\
          extr_category(f_category),\
          extr_ones(f_lng,''),\
          extr_ones(f_lat,'')]

    ppd_platforms = {}
    zmq_platforms = {}

    # 生成ppd静态特征
    with open(filename_ppd, 'rb') as csvfile_ppd:
        reader = csv.DictReader(csvfile_ppd)
        for row in reader:
            parsed = parse(row['tags'],row['score'],row['averageProfit'],row['registMoney'],row['autobid'],row['stockTransfer'],row['fundsToken'],\
                  row['guaranteeMode'],row['guaranteeOrg'],row['lauchTime'],row['category'],row['lng'],row['lat'])
            ppd_platforms[row['platName']] = parsed

    # 生成zmq静态特征
    for line in fileinput.input(filename_zmq):
        if fileinput.lineno() == 1:
            field_names = line.strip().split('\t')
        else:
            row = {
                name: field
                for name, field in zip(field_names,
                                       line.strip().split('\t'))
            }
            parsed = parse('','',row.get('平均收益',''),row.get('注册资本',''),row.get('自动投标',''),row.get('债权转让',''),row.get('资金托管',''),\
                  row.get('保障模式',''),'',row.get('上线时间',''),row.get('公司类型',''),'','')
            zmq_platforms[row['平台名称']] = parsed
        if fileinput.lineno() == ZMQ_Nline:
            break
    fileinput.close()

    if write_overlap:
        print '重合平台:', len(
            set(ppd_platforms.keys()) & set(zmq_platforms.keys()))
        with open('overlap_platforms.txt', 'w') as outfile:
            outfile.write('\n'.join(
                list(set(ppd_platforms.keys()) & set(zmq_platforms.keys()))))

    return ppd_platforms, zmq_platforms
def statistic(filename,
              bycol='INFOSOURCENAME',
              outdir='statistic_infosource',
              time_granularity='%m',
              Nmonth=20):
    # TBD: use collections.defaultdict()
    case_dict = {}

    for line in fileinput.input(filename):
        if fileinput.lineno() % 10**4 == 0:
            print sys.stdout.write(str(fileinput.lineno()) + '\r')
            sys.stdout.flush()
        TASKID, COORDX, COORDY, INFOSOURCENAME, DISCOVERTIME, SOLVINGTIME, \
        ADDRESS, STREETNAME, DESCRIPTION, EXECUTEDEPTNAME, URGENTDEGREE, USEREVALUATE, \
        INFOBCNAME, INFOSCNAME, INFOZCNAME, CASEENDBCNAME, CASEENDSCNAME = map(lambda x:x.strip(), line.decode('utf-8').split(u'\t'))
        INFOBCNAME, INFOSCNAME, INFOZCNAME = map(
            lambda x: re.sub(ur'\(浦东\)', '', x),
            [INFOBCNAME, INFOSCNAME, INFOZCNAME])
        if COORDX and COORDY and float(COORDX) and float(
                COORDY) and DISCOVERTIME:
            timeslot = time.strptime(
                DISCOVERTIME, '%Y/%m/%d %H:%M:%S').tm_mon + (
                    12 if DISCOVERTIME.startswith('2016') else 0)
            if bycol == 'STREETNAME':
                COL = STREETNAME
            elif bycol == 'EXECUTEDEPTNAME':
                COL = EXECUTEDEPTNAME
            elif bycol == 'INFOSOURCENAME':
                COL = INFOSOURCENAME
            else:
                raise Exception('Column is not supported.')
            case_dict[COL] = case_dict.get(COL, {})
            case_dict[COL][INFOBCNAME] = case_dict[COL].get(
                INFOBCNAME, [0] * 24)
            case_dict[COL][INFOBCNAME][timeslot - 1] += 1
    fileinput.close()

    compare = lambda array, delta: [
        u'{0:+.2f}'.format(1. *
                           (array[i] - array[i - delta]) / array[i - delta])
        if i - delta >= 0 and array[i - delta] else u'-'
        for i in xrange(len(array))
    ]
    for i, BCNAME in enumerate(category1 + category2):
        with open('{0}/{1}{2}.txt'.format(outdir, i, BCNAME.encode('utf-8')),
                  'w') as outfile:
            outfile.write(u'{0}\t{1}\n'.format(
                bycol.decode('utf-8'), u'\t'.join([
                    u'{0}年{1}月({2})'.format(year, month, title)
                    for year in (2015, 2016) for month in xrange(1, 13)
                    for title in (u'次数', u'对比上月', u'对比上年同期')
                ][:Nmonth * 3])).encode('utf-8'))
            for COL, cases in case_dict.iteritems():
                if BCNAME in cases and BCNAME:
                    outfile.write(u'{0}\t{1}\n'.format(
                        COL, u'\t'.join([
                            u'{0}\t{1}\t{2}'.format(c1, c2, c3)
                            for c1, c2, c3 in zip(cases[BCNAME],
                                                  compare(cases[BCNAME], 1),
                                                  compare(cases[BCNAME], 12))
                        ][:Nmonth])).encode('utf-8'))

    with open('{0}/_event.txt'.format(outdir, bycol), 'w') as outfile:
        outfile.write(bycol + '\t' + '\t'.join(
            BCNAME.encode('utf-8')
            for _, BCNAME in enumerate(category1 + category2)) + '\n')
        for COL, cases in case_dict.iteritems():
            outfile.write(
                COL.encode('utf-8') + '\t' + '\t'.join([
                    str(sum(case_dict[COL].get(BCNAME, [0])))
                    for BCNAME in category1 + category2
                ]) + '\n')

    with open('{0}/_sumup.txt'.format(outdir, bycol), 'w') as outfile:
        outfile.write(bycol + '\t' + '事件总数' + '\n')
        for COL, cases in case_dict.iteritems():
            outfile.write(
                COL.encode('utf-8') + '\t' + str(
                    sum([
                        sum(case_dict[COL].get(BCNAME, [0]))
                        for BCNAME in category1 + category2
                    ])) + '\n')
예제 #35
0
def parse(filename, skipPhoto=True):
	"""
	A function to parse the cosima output simulation file.  The function returns a simulation object.
	Example Usage: 
	simulation = EventViewer.parse(filename)
	 """

	# Start an event counter
	currentEventNumber = 0

	# Loop through each line of the file
	for line in fileinput.input([filename]):

		# Create the first event
		if 'SE' in line and currentEventNumber == 0:

			# Create a new simulation object to store all of the events in this run
			simulation = Simulation()

			# Create a new event
			event = Event()

			# Create a new object to store the interactions for this event
			interactions = Interactions()

			# Create a new object to store the hits for this event
			hits = Hits()

			# Increment the event number
			currentEventNumber = currentEventNumber + 1

		# Store the existing event and create a new event
		elif 'SE' in line or 'EN' in line:
			
			# Store the interaction and hit objects in their respective event
			event.interactions = interactions
			event.hits = hits

			# Store the current event in the simulation object			
			simulation.events.append(event)

			# Create a new event
			event = Event()

			# Create a new object to store the interactions for the new event
			interactions = Interactions()

			# Create a new object to store the hits for the new event
			hits = Hits()

			# Increment the event number
			currentEventNumber = currentEventNumber + 1

		# Get the event ID
		if 'ID' in line and currentEventNumber != 0:

			event.id_trigger = line.split()[1]
			event.id_simulatedEvent = line.split()[2]

		# Get the event time
		if 'TI' in line and currentEventNumber != 0:

			event.time = line.split()[1]

		# Get the total deposited energy 
		if 'ED' in line and currentEventNumber != 0:

			event.depositedEnergy = line.split()[1]

		# Get the total escaped energy 
		if 'EC' in line and currentEventNumber != 0:

			event.escapedEnergy = line.split()[1]

		# Get the total deposited energy in non-sensative material
		if 'NS' in line and currentEventNumber != 0:

			event.depositedEnergy_NonSensitiveMaterial = line.split()[1]		

		# if 'IA' in line and 'PHOT' not in line:
		if 'IA' in line:

			# Skip photoelectric interactions
			if skipPhoto == True:
				if 'PHOT' in line:
					continue 

			# Split the line
			LineContents = line.split(';')	

			# Parse each line and place the extracted information into their respective arrays
			interactions.interactionType.append(LineContents[0].split()[1].split()[0])
			interactions.ID_interaction.append(LineContents[0].split()[2].split()[0])
			interactions.ID_parentInteraction.append(LineContents[1].split()[0])
			interactions.ID_detector.append(LineContents[2])
			interactions.timeStart.append(float(LineContents[3]))
			interactions.x.append(float(LineContents[4]))
			interactions.y.append(float(LineContents[5]))
			interactions.z.append(float(LineContents[6]))
			interactions.ID_parentParticleType.append(LineContents[7].split()[0])
			interactions.x_newDirection_OriginalParticle.append(float(LineContents[8]))
			interactions.y_newDirection_OriginalParticle.append(float(LineContents[9]))
			interactions.z_newDirection_OriginalParticle.append(float(LineContents[10]))
			interactions.x_polarization_OriginalParticle.append(LineContents[11])
			interactions.y_polarization_OriginalParticle.append(LineContents[12])
			interactions.z_polarization_OriginalParticle.append(LineContents[13])
			interactions.newKineticEnergy_OriginalParticle.append(LineContents[14])
			interactions.ID_childParticleType.append(LineContents[15])
			interactions.x_direction_NewParticle.append(float(LineContents[16]))
			interactions.y_direction_NewParticle.append(float(LineContents[17]))
			interactions.z_direction_NewParticle.append(float(LineContents[18]))
			interactions.x_polarization_NewParticle.append(LineContents[19])
			interactions.y_polarization_NewParticle.append(LineContents[20])
			interactions.z_polarization_NewParticle.append(LineContents[21])
			interactions.newKineticEnergy_NewParticle.append(LineContents[22].rstrip())

			if 'INIT' in line:
				event.initialEnergy = interactions.newKineticEnergy_NewParticle[-1]

			# Create a unique particle id to track parent and child particles
			ID_parentParticle = interactions.ID_parentInteraction[-1] + '_' + interactions.ID_parentParticleType[-1]
			ID_childParticle = interactions.ID_interaction[-1] + '_' + interactions.ID_childParticleType[-1]

			# if ID_childParticleType == '1':
			# 	ID_childParticle = ID_parentInteraction + '_' + ID_childParticleType
			# else:
			# 	ID_childParticle = ID_interaction + '_' + ID_childParticleType

			# Store the information for the individual particles associated with this interaction

			# Record the particle trajectory
			if ID_parentParticle in event.particleInformation:
				event.particleInformation[ID_parentParticle]['x'].append(interactions.x[-1])
				event.particleInformation[ID_parentParticle]['y'].append(interactions.y[-1])
				event.particleInformation[ID_parentParticle]['z'].append(interactions.z[-1])
				event.particleInformation[ID_parentParticle]['time'].append(interactions.timeStart[-1])
			else:
				event.particleInformation[ID_parentParticle] = {}
				event.particleInformation[ID_parentParticle]['x'] = [interactions.x[-1]]
				event.particleInformation[ID_parentParticle]['y'] = [interactions.y[-1]]
				event.particleInformation[ID_parentParticle]['z'] = [interactions.z[-1]]
				event.particleInformation[ID_parentParticle]['time'] = [interactions.timeStart[-1]]

			if ID_childParticle in event.particleInformation:
				event.particleInformation[ID_childParticle]['x'].append(interactions.x[-1])
				event.particleInformation[ID_childParticle]['y'].append(interactions.y[-1])
				event.particleInformation[ID_childParticle]['z'].append(interactions.z[-1])
				event.particleInformation[ID_childParticle]['time'].append(timeStart[-1])
			else:
				event.particleInformation[ID_childParticle] = {}
				event.particleInformation[ID_childParticle]['x'] = [interactions.x[-1]]
				event.particleInformation[ID_childParticle]['y'] = [interactions.y[-1]]
				event.particleInformation[ID_childParticle]['z'] = [interactions.z[-1]]
				event.particleInformation[ID_childParticle]['time'] = [interactions.timeStart[-1]]
	
		# Record the hit information
		if 'HTsim' in line:

			# Split the line
			LineContents = line.split(';')	
                        
			# Extract the hit information
                        hits.detector.append(int(LineContents[0].split(' ')[1]))
			hits.x.append(float(LineContents[1]))
			hits.y.append(float(LineContents[2]))
			hits.z.append(float(LineContents[3]))
			hits.energy.append(float(LineContents[4]))


	# Close the input file
	fileinput.close()

	return simulation
예제 #36
0
 def test_state_is_None(self):
     """Tests that fileinput.close() does nothing if fileinput._state
        is None"""
     fileinput._state = None
     fileinput.close()
     self.assertIsNone(fileinput._state)
def replaceKey(fileName, key, value):
    for line in fileinput.FileInput(fileName, inplace=1):
        if key in line:
            line = line.replace(key, value)
        sys.stdout.write(line)
    fileinput.close()
예제 #38
0
def _sudoFichGrid_(nomFich, mode, pr=False):
    '''Lit le fichier de caractères et fait les vérifications.
    Si mode = 1 (défaut) : retourne une liste de lignes
    Si mode = 2 : retourne une liste de listes
    '''
    if nomFich == None:
        raise Sudoku_Error \
              ("Pas de nom de fichier - Abandon")
    if mode not in (1,2):
        raise Sudoku_Error ("Mode de lecture de fichier invalide")
    
    if pr:
        display("Lecture du fichier ", nomFich, " :")
    listLines = list()
    lineno = 0
    try:
        for line in fileinput.input(nomFich):
            #si la ligne commence par # l'ignorer
            if line[0] == '#':
                continue
            #erreur s'il y a plus de 9 lignes valides dans le fichier
            if lineno > 9:
                raise Sudoku_Error \
                      ("Le fichier contient trop de données. Lecture "\
                       "interrompue. La grille a été remplie.")
            #vérifier les caractères et rectifier les vides
            line2 = ""
            for c in line:
                #ignorer les espaces
                if c == ' ':
                    continue
                #traiter les équivalents à l'absence de chiffre
                if c in ('0', '.', '-', '_'):
                    line2 = line2 + '0'
                #accepter uniquement de '1' à '9'
                elif str(0) <= str(c) <= str(9):
                    line2 = line2 + str(c)
                #fin de la ligne ignorer le '\n' final
                elif c == '\n':
                    break
                else:
                    raise Sudoku_Error \
                          ("caractère invalides dans la ligne : " + line)
            #si la ligne est complètement blanche passer à la suivante
            #sans la compter
            if len(line2) == 0:
                continue
            #erreur si la ligne contient plus ou moins de 9 chiffres
            if len(line2) != 9:
                raise Sudoku_Error \
                      ("la ligne" + str(lineno) + " : " + line2 + \
                       " ne contient pas exactement 9 chiffres. ")
            #ok, la ligne est valide - Ajout à la liste
            if mode == 1:                   #mode liste de lignes
                listLines.append(line2)
            elif mode == 2:                 #mode liste de listes
                listeval = list()
                for c in line2:
                    listeval.append(int(c))
                listLines.append(listeval)
            else:
                raise Sudoku_Error ("Mode de lecture de fichier invalide")

            lineno = lineno + 1
            if pr:
                display("ligne" + str(lineno) + " : " + line2)
        #end for
    except FileNotFoundError:
        raise Sudoku_Error ("Fichier invalide ou n'existe pas")
        return None
    finally:
        fileinput.close()
        
    #erreur s'il y a eu moins de 9 lignes valides dans le fichier
    if lineno < 9:
        raise Sudoku_Error \
              ("Erreur de lecture, le fichier contient moins de 9 lignes "\
               "de chiffres.")

    #ok, retourne la liste des 9 lignes de 9 chiffres
    if pr:
        display("Ok, 9 lignes de 9 chiffres.")
    return listLines
예제 #39
0
def main(args):
    p = argparse.ArgumentParser(description=__doc__)
    p.add_argument(
        "-c",
        "--bytes",
        default="",
        type=str,
        metavar='K',
        help="""output the last K bytes; or -c +K starting with the Kth""")
    p.add_argument("-f",
                   "--follow",
                   action="store_true",
                   help="""follow specified files""")
    p.add_argument("-n",
                   "--lines",
                   default="10",
                   type=str,
                   metavar='K',
                   help="""print the last K lines instead of 10;
                   or use -n +K to print lines starting with the Kth""")
    p.add_argument("-q",
                   "--quiet",
                   "--silent",
                   action='store_true',
                   help="never print headers for each file")
    p.add_argument("-v",
                   "--verbose",
                   action='store_true',
                   help="always print headers for each file")
    p.add_argument(
        "-s",
        "--sleep-interval",
        type=float,
        default=1.0,
        help=
        "with -f, sleep for approximately N seconds (default 1.0) between iterations."
    )
    p.add_argument("files", action="store", nargs="*", help="files to print")
    ns = p.parse_args(args)

    status = 0

    if len(ns.files) == 0:
        ns.files = ['-']

    if ns.follow and '-' in ns.files:
        print('tail: warning: following stdin indefinitely is ineffective')

    if ns.bytes:
        use_bytes = True
        if ns.bytes[0] == '+':
            from_start = True
        else:
            from_start = False
        count = abs(int(ns.bytes))  # '-n -3' is equivalent to '-n 3'
    else:
        use_bytes = False
        if ns.lines[0] == '+':
            from_start = True
        else:
            from_start = False
        count = abs(int(ns.lines))  # '-n -3' is equivalent to '-n 3'

    try:
        for i, fname in enumerate(ns.files):
            if ns.verbose or (len(ns.files) > 1 and not ns.quiet):
                write_header(fname if fname != '-' else 'standard input')

            try:
                if fname == '-':
                    f = sys.stdin
                else:
                    f = open(fname)

                buf = []
                j = -1
                while True:
                    j += 1
                    if use_bytes:
                        l = f.read(1)
                    else:
                        l = f.readline()
                    if not l:
                        break

                    buf.append(l)
                    if from_start:
                        if j >= count - 1: break
                    elif len(buf) > count:
                        del buf[0]

                for item in buf:
                    print(item, end='')

                if i == len(ns.files) - 1 and ns.follow:
                    for l in tail_f(f, ns.sleep_interval):
                        print(l, end='')
                        sys.stdout.flush()
            finally:
                if fname != '-':
                    f.close()

    except Exception as e:
        print('tail :%s' % str(e))
        status = 1
    finally:
        fileinput.close()

    sys.exit(status)
예제 #40
0
파일: extract.py 프로젝트: mrcuizhe/qapa
def main(args, fout=sys.stdout):

    # print "\t".join(["seqnames", "start", "end", "name", "utr_length", "strand",
    #"lastexon_cds_start", "lastexon_cds_end", "name2",
    #"exonStarts", "exonEnds"])

    # conn = sqlite3.connect(args.db)

    # query = "select gene_biotype, transcript_biotype from ensembl_id where transcript_id = ?"

    conn = pd.read_table(args.db)
    conn = conn.loc[:,
                    ['Transcript stable ID', 'Gene type', 'Transcript type'
                     ]].drop_duplicates()
    conn = conn.set_index(['Transcript stable ID'])

    c = 0
    n = 0
    for row in fileinput.input(args.annotation_file[0],
                               openhook=fileinput.hook_compressed):

        if fileinput.isfirstline() and not args.no_header:
            continue
        n = n + 1

        if re.match(r"^#", row):
            c = c + 1
            continue

        rowobj = Row(row, args.no_header)

        if not args.no_skip_random_chromosomes and \
            rowobj.is_on_random_chromosome():
            c = c + 1
            continue

        # filter for only protein-coding genes
        # result = conn.execute(query, (rowobj.get_stripped_name(),))
        # result = result.fetchone()
        # if result is None or \
        #     not (result[0] == "protein_coding" and \
        #     result[1] == "protein_coding"):
        #         c = c + 1
        #         continue

        # filter for only protein-coding genes
        try:
            result = conn.loc[rowobj.get_stripped_name()]
            if isinstance(result, pd.DataFrame):
                result = result.iloc[0, ]
            if not (result['Gene type'] == "protein_coding"
                    and result['Transcript type'] == "protein_coding"):
                c = c + 1
                continue
        except KeyError:
            c = c + 1
            continue

        bed = rowobj.extract_last_exon()

        if bed is not None:
            fout.write("\t".join([str(x) for x in bed]) + "\n")
        else:
            c = c + 1

    fileinput.close()
    # conn.close()
    if float(c) / float(n) > 0.75:
        print("Warning: %d/%d (%0.2f%%) were skipped. Are you using the "
              "correct database?" % (c, n, float(c) / float(n)),
              file=sys.stderr)
예제 #41
0
 def __init__(self, lineno=-1, linecontent="", message=""):
     self.lineno = lineno
     self.linecontent = linecontent
     self.message = message
     fileinput.close()
예제 #42
0
def demonstrate_tide_effect():
    day = [[[[0] * ranget for j in xrange(rangey)] for i in xrange(rangex)]
           for d in xrange(8)]
    for d, filename in enumerate(sorted(
            glob.glob(r"../data/pos_hour_user#/*"))):
        print filename
        for line in fileinput.input(filename):
            part = line.strip().split(" ")
            px, py, s, c = int(part[0].split(",")[0]), int(
                part[0].split(",")[1]), int(part[1]), int(part[2])
            day[d][px][py][s] = c
        fileinput.close()

    for dlist, fname in [(range(1, 6), "weekday"),
                         (range(0, 1) + range(6, 8), "weekend")]:
        mask = np.array([[1 if np.array([[day[df][i][j][kf] for kf in xrange(ranget)] for df in dlist]).sum()/len(dlist)>=10*ranget else 0 \
           for j in xrange(rangey)] for i in xrange(rangex)]).sum()
        mesh = [[[sum([day[d][i][j][k] for d in dlist])/len(dlist) if np.array([[day[df][i][j][kf] for kf in xrange(ranget)] for df in dlist]).sum()/len(dlist)>=10*ranget else 0 for k in xrange(ranget)] \
           for j in xrange(rangey)] for i in xrange(rangex)]
        mesh = [[[float(mesh[i][j][k])/sum(mesh[i][j]) if sum(mesh[i][j])!=0 else 0 for k in xrange(ranget)] \
           for j in xrange(rangey)] for i in xrange(rangex)]
        avg = [
            float(
                np.array([[mesh[i][j][k] for j in xrange(rangey)]
                          for i in xrange(rangex)]).sum()) / mask
            for k in xrange(ranget)
        ]
        mesh = [[[mesh[i][j][k]-avg[k] if sum(mesh[i][j])!=0 else 0 for k in xrange(ranget)] \
           for j in xrange(rangey)] for i in xrange(rangex)]
        with open("../data/var/{0}.txt".format(fname), "w") as f:
            for i in xrange(rangex):
                for j in xrange(rangey):
                    if sum(mesh[i][j]) != 0:
                        f.write("{0} {1} {2}\n".format(
                            i, j,
                            " ".join([str(round(x, 6)) for x in mesh[i][j]])))

    plt.figure(figsize=(12, 8))
    levels = arange(-1, 1.1, 0.1)
    cmap, norm = cm.PRGn, cm.colors.Normalize(vmax=1.1, vmin=-1)
    for c, t in enumerate([4, 8, 10, 16, 18, 22]):
        colormap = [[0 for j in xrange(rangey)] for i in xrange(rangex)]
        for line in fileinput.input("../data/var/weekday.txt"):
            part = line.strip().split(" ")
            x, y, f = int(part[0]), int(part[1]), float(part[2:][t])
            colormap[x][y] = f
        fileinput.close()
        cmax = np.array([[abs(colormap[i][j]) for j in xrange(rangey)]
                         for i in xrange(rangex)]).max()
        colormap = [[colormap[i][j] / cmax for j in xrange(rangey)]
                    for i in xrange(rangex)]
        (X, Y), C = meshgrid(np.arange(100),
                             np.arange(100)), np.array(colormap)[20:120,
                                                                 20:120]
        subplot(2, 3, c + 1)
        cset = contourf(X,
                        Y,
                        C.T,
                        levels,
                        cmap=cm.get_cmap("seismic", len(levels)),
                        norm=norm)
        plt.axis([0, 100 - 1, 0, 100 - 1])
        plt.xticks(np.linspace(0, 100, 6))
        plt.yticks(np.linspace(0, 100, 6))
        plt.title('{0}:00'.format(str(t).zfill(2)))
        if c == 0:
            plt.xlabel('Longitude grid index /200m')
            plt.ylabel('Latitude grid index /200m')
        if c == 3:
            subplots_adjust(hspace=0.4)
    subplots_adjust(bottom=0.1, left=0.06, right=0.9, top=0.9)
    cax2 = axes([0.92, 0.10, 0.01, 0.8])
    colorbar(cax=cax2)
    # show()
    for postfix in ('eps', 'png'):
        savefig('../figure/{0}/11.{0}'.format(postfix))
예제 #43
0
 def _closeImageFile(self):
     close()
     del self._file  # We need to delete and free file variable otherwise windows couldn't move current file
     self._file = None
예제 #44
0
def main():

    print "Processing Info.plist files..."

    MAJORSTR = ""
    MINORSTR = ""
    BUGFIXSTR = ""
    PLUG_VER_STR = ""

    BUNDLE_MFR = ""
    BUNDLE_NAME = ""
    PLUG_NAME_STR = ""
    PLUG_MFR_NAME_STR = ""
    PLUG_CHANNEL_IO = ""
    PLUG_COPYRIGHT = ""
    PLUG_UID = ""
    PLUG_MFR_UID = ""
    PLUG_FACTORY = ""
    PLUG_ENTRY = ""
    PLUG_VIEW_ENTRY = ""
    PLUG_IS_INST = 0
    PLUG_DOES_MIDI = 0

    # extract values from resource.h
    for line in fileinput.input(projectpath + "/resource.h", inplace=0):
        if "#define PLUG_VER " in line:
            PLUG_VER_STR = string.lstrip(line, "#define PLUG_VER ")
            PLUG_VER = int(PLUG_VER_STR, 16)
            MAJOR = PLUG_VER & 0xFFFF0000
            MAJORSTR = str(MAJOR >> 16)
            MINOR = PLUG_VER & 0x0000FF00
            MINORSTR = str(MINOR >> 8)
            BUGFIXSTR = str(PLUG_VER & 0x000000FF)

        if "#define PLUG_NAME " in line:
            PLUG_NAME_STR = string.lstrip(line, "#define PLUG_NAME ")

        if "#define PLUG_MFR " in line:
            PLUG_MFR_NAME_STR = string.lstrip(line, "#define PLUG_MFR ")

        if "#define BUNDLE_MFR " in line:
            BUNDLE_MFR = string.lstrip(line, "#define BUNDLE_MFR ")

        if "#define BUNDLE_NAME " in line:
            BUNDLE_NAME = string.lstrip(line, "#define BUNDLE_NAME ")

        if "#define PLUG_CHANNEL_IO " in line:
            PLUG_CHANNEL_IO = string.lstrip(line, "#define PLUG_CHANNEL_IO ")

        if "#define PLUG_COPYRIGHT " in line:
            PLUG_COPYRIGHT = string.lstrip(line, "#define PLUG_COPYRIGHT ")

        if "#define PLUG_UNIQUE_ID " in line:
            PLUG_UID = string.lstrip(line, "#define PLUG_UNIQUE_ID ")

        if "#define PLUG_MFR_ID " in line:
            PLUG_MFR_UID = string.lstrip(line, "#define PLUG_MFR_ID ")

        if "#define PLUG_ENTRY " in line:
            PLUG_ENTRY = string.lstrip(line, "#define PLUG_ENTRY ")

        if "#define PLUG_FACTORY " in line:
            PLUG_FACTORY = string.lstrip(line, "#define PLUG_FACTORY ")

        if "#define PLUG_VIEW_ENTRY " in line:
            PLUG_VIEW_ENTRY = string.lstrip(line, "#define PLUG_VIEW_ENTRY")

        if "#define PLUG_IS_INST " in line:
            PLUG_IS_INST = int(string.lstrip(line, "#define PLUG_IS_INST "),
                               16)

        if "#define PLUG_DOES_MIDI " in line:
            PLUG_DOES_MIDI = int(
                string.lstrip(line, "#define PLUG_DOES_MIDI "), 16)

    FULLVERSIONSTR = MAJORSTR + "." + MINORSTR + "." + BUGFIXSTR

    #strip quotes and newlines
    PLUG_VER_STR = PLUG_VER_STR[0:-1]
    BUNDLE_MFR = BUNDLE_MFR[1:-2]
    BUNDLE_NAME = BUNDLE_NAME[1:-2]
    PLUG_NAME_STR = PLUG_NAME_STR[1:-2]
    PLUG_MFR_NAME_STR = PLUG_MFR_NAME_STR[1:-2]
    PLUG_CHANNEL_IO = PLUG_CHANNEL_IO[1:-2]
    PLUG_COPYRIGHT = PLUG_COPYRIGHT[1:-2]
    PLUG_MFR_UID = PLUG_MFR_UID[1:-2]
    PLUG_UID = PLUG_UID[1:-2]
    PLUG_FACTORY = PLUG_FACTORY[0:-1]
    PLUG_ENTRY = PLUG_ENTRY[0:-1]
    PLUG_VIEW_ENTRY = PLUG_VIEW_ENTRY[0:-1]

    CFBundleGetInfoString = BUNDLE_NAME + " v" + FULLVERSIONSTR + " " + PLUG_COPYRIGHT
    CFBundleVersion = FULLVERSIONSTR
    CFBundlePackageType = "BNDL"
    CSResourcesFileMapped = True

    fileinput.close()

    LSMinimumSystemVersion = "10.7.0"

    BASE_SDK = "macosx10.13"
    DEPLOYMENT_TARGET = "10.7.0"

    # extract values from common.xcconfig
    for line in fileinput.input(projectpath + "/../../common.xcconfig",
                                inplace=0):
        if not "//" in line:
            if "BASE_SDK = " in line:
                BASE_SDK = string.lstrip(line, "BASE_SDK = ")
#      if "MACOSX_DEPLOYMENT_TARGET = " in line:
#        DEPLOYMENT_TARGET = string.lstrip(line, "MACOSX_DEPLOYMENT_TARGET = ")

    BASE_SDK = BASE_SDK[0:-1]
    #  DEPLOYMENT_TARGET = DEPLOYMENT_TARGET[0:-1]
    #  DEPLOYMENT_TARGET += ".0"

    LSMinimumSystemVersion = DEPLOYMENT_TARGET

    # VST3

    plistpath = projectpath + "/resources/" + BUNDLE_NAME + "-VST3-Info.plist"
    vst3 = plistlib.readPlist(plistpath)
    vst3['CFBundleExecutable'] = BUNDLE_NAME
    vst3['CFBundleGetInfoString'] = CFBundleGetInfoString
    vst3[
        'CFBundleIdentifier'] = "com." + BUNDLE_MFR + ".vst3." + BUNDLE_NAME + ""
    vst3['CFBundleName'] = BUNDLE_NAME
    vst3['CFBundleVersion'] = CFBundleVersion
    vst3['CFBundleShortVersionString'] = CFBundleVersion
    vst3['LSMinimumSystemVersion'] = LSMinimumSystemVersion
    vst3['CFBundlePackageType'] = CFBundlePackageType
    vst3['CFBundleSignature'] = PLUG_UID
    vst3['CSResourcesFileMapped'] = CSResourcesFileMapped

    plistlib.writePlist(vst3, plistpath)
    replacestrs(plistpath, "//Apple//", "//Apple Computer//")

    # VST2

    plistpath = projectpath + "/resources/" + BUNDLE_NAME + "-VST2-Info.plist"
    vst2 = plistlib.readPlist(plistpath)
    vst2['CFBundleExecutable'] = BUNDLE_NAME
    vst2['CFBundleGetInfoString'] = CFBundleGetInfoString
    vst2[
        'CFBundleIdentifier'] = "com." + BUNDLE_MFR + ".vst2." + BUNDLE_NAME + ""
    vst2['CFBundleName'] = BUNDLE_NAME
    vst2['CFBundleVersion'] = CFBundleVersion
    vst2['CFBundleShortVersionString'] = CFBundleVersion
    vst2['LSMinimumSystemVersion'] = LSMinimumSystemVersion
    vst2['CFBundlePackageType'] = CFBundlePackageType
    vst2['CFBundleSignature'] = PLUG_UID
    vst2['CSResourcesFileMapped'] = CSResourcesFileMapped

    plistlib.writePlist(vst2, plistpath)
    replacestrs(plistpath, "//Apple//", "//Apple Computer//")

    # AUDIOUNIT

    plistpath = projectpath + "/resources/" + BUNDLE_NAME + "-AU-Info.plist"
    au = plistlib.readPlist(plistpath)
    au['AudioComponents'] = [{}]
    au['AudioUnit Version'] = PLUG_VER_STR
    au['CFBundleExecutable'] = BUNDLE_NAME
    au['CFBundleGetInfoString'] = CFBundleGetInfoString
    au['CFBundleIdentifier'] = "com." + BUNDLE_MFR + ".audiounit." + BUNDLE_NAME + ""
    au['CFBundleName'] = BUNDLE_NAME
    au['CFBundleVersion'] = CFBundleVersion
    au['CFBundleShortVersionString'] = CFBundleVersion
    au['LSMinimumSystemVersion'] = LSMinimumSystemVersion
    au['CFBundlePackageType'] = CFBundlePackageType
    au['CFBundleSignature'] = PLUG_UID
    au['CSResourcesFileMapped'] = CSResourcesFileMapped

    #Steinberg AU Wrapper stuff

    #Apple 10.7+ SDK stuff
    #https://developer.apple.com/library/mac/technotes/tn2276/_index.html

    if PLUG_IS_INST:
        COMP_TYPE = kAudioUnitType_MusicDevice
    elif PLUG_DOES_MIDI:
        COMP_TYPE = kAudioUnitType_MusicEffect
    else:
        COMP_TYPE = kAudioUnitType_Effect

    #if compiling against 10.6 sdk, delete AudioComponents key
    if (BASE_SDK == "macosx10.5") or (BASE_SDK == "macosx10.6"):
        print "Component manager entry point only"
        if (au['AudioComponents']):
            del au['AudioComponents']
    else:
        print "AudioComponent and Component manager entry points"
        au['AudioComponents'] = [{}]
        au['AudioComponents'][0]['resourceUsage'] = {}

        au['AudioComponents'][0]['description'] = PLUG_NAME_STR
        au['AudioComponents'][0]['factoryFunction'] = PLUG_FACTORY
        au['AudioComponents'][0]['manufacturer'] = PLUG_MFR_UID
        au['AudioComponents'][0][
            'name'] = PLUG_MFR_NAME_STR + ": " + PLUG_NAME_STR
        au['AudioComponents'][0]['subtype'] = PLUG_UID
        au['AudioComponents'][0]['type'] = COMP_TYPE
        au['AudioComponents'][0]['version'] = PLUG_VER

        #Sandbox stuff
        # https://developer.apple.com/library/Mac/technotes/tn2247/_index.html
        au['AudioComponents'][0]['sandboxSafe'] = True
        #au['AudioComponents'][0]['resourceUsage']['temporary-exception.files.all.read-write'] = True

    plistlib.writePlist(au, plistpath)
    replacestrs(plistpath, "//Apple//", "//Apple Computer//")

    # AAX

    plistpath = projectpath + "/resources/" + BUNDLE_NAME + "-AAX-Info.plist"
    aax = plistlib.readPlist(plistpath)
    aax['CFBundleExecutable'] = BUNDLE_NAME
    aax['CFBundleGetInfoString'] = CFBundleGetInfoString
    aax['CFBundleIdentifier'] = "com." + BUNDLE_MFR + ".aax." + BUNDLE_NAME + ""
    aax['CFBundleName'] = BUNDLE_NAME
    aax['CFBundleVersion'] = CFBundleVersion
    aax['CFBundleShortVersionString'] = CFBundleVersion
    aax['LSMinimumSystemVersion'] = LSMinimumSystemVersion
    aax['CSResourcesFileMapped'] = CSResourcesFileMapped

    plistlib.writePlist(aax, plistpath)
    replacestrs(plistpath, "//Apple//", "//Apple Computer//")

    # RTAS

    plistpath = projectpath + "/resources/" + BUNDLE_NAME + "-RTAS-Info.plist"
    rtas = plistlib.readPlist(plistpath)
    rtas['CFBundleExecutable'] = BUNDLE_NAME
    rtas['CFBundleGetInfoString'] = CFBundleGetInfoString
    rtas[
        'CFBundleIdentifier'] = "com." + BUNDLE_MFR + ".rtas." + BUNDLE_NAME + ""
    rtas['CFBundleName'] = BUNDLE_NAME
    rtas['CFBundleVersion'] = CFBundleVersion
    rtas['CFBundleShortVersionString'] = CFBundleVersion
    rtas['LSMinimumSystemVersion'] = LSMinimumSystemVersion
    rtas['CSResourcesFileMapped'] = CSResourcesFileMapped

    plistlib.writePlist(rtas, plistpath)
    replacestrs(plistpath, "//Apple//", "//Apple Computer//")

    # APP

    plistpath = projectpath + "/resources/" + BUNDLE_NAME + "-OSXAPP-Info.plist"
    osxapp = plistlib.readPlist(plistpath)
    osxapp['CFBundleExecutable'] = BUNDLE_NAME
    osxapp['CFBundleGetInfoString'] = CFBundleGetInfoString
    osxapp[
        'CFBundleIdentifier'] = "com." + BUNDLE_MFR + ".standalone." + BUNDLE_NAME + ""
    osxapp['CFBundleName'] = BUNDLE_NAME
    osxapp['CFBundleVersion'] = CFBundleVersion
    osxapp['CFBundleShortVersionString'] = CFBundleVersion
    osxapp['LSMinimumSystemVersion'] = LSMinimumSystemVersion
    osxapp['CFBundlePackageType'] = CFBundlePackageType
    osxapp['CFBundleSignature'] = PLUG_UID
    osxapp['CSResourcesFileMapped'] = CSResourcesFileMapped
    osxapp['NSPrincipalClass'] = "SWELLApplication"
    osxapp['NSMainNibFile'] = "MainMenu"
    osxapp['LSApplicationCategoryType'] = "public.app-category.music"
    osxapp['CFBundleIconFile'] = BUNDLE_NAME + ".icns"

    plistlib.writePlist(osxapp, plistpath)
    replacestrs(plistpath, "//Apple//", "//Apple Computer//")

    print "Processing .exp symbol export file..."
예제 #45
0
def generate_genecords_tar(dbname, frame=None, outbox=None, GUI="n"):
  if GUI == "n":    
    print("Generating genecords TAR archive")
    log("Generating genecords TAR archive")
  #Create genecords directory if nonexistent
  try:
    os.mkdir("genecords")
  except:
    pass
  #genecordslistdict = {}

  #Read gene coordinates input from genbank_mf_all/txt
  passedtags = []
  genome_info = {}
  lasttag = ""
  for i in fileinput.input(dbname + "_all.txt"):
    if GUI == "y":
      frame.update()
    if len(i) > 0:
      i = i.replace(">","").replace("\n","")
      tabs = i.split("|")
      protein = tabs[3]
      genome = tabs[0]
      tag = genome[:5].upper()
      #If new genome is reached, save data in pickle file
      if tag != lasttag and not fileinput.isfirstline():
        if GUI == "y":
          frame.update()
        #Load previous data if available
        if lasttag in passedtags:
          pickle_file = open("genecords" + os.sep + lasttag + ".pickle", "rb")
          previous_data = pickle.load(pickle_file)
          for key in previous_data:
            if key in genome_info:
              genome_info[key].extend(previous_data[key])
            else:
              genome_info[key] = previous_data[key]
        else:
          passedtags.append(lasttag)
        save_to_pickle(lasttag, genome_info)
        genome_info = {}
      if genome in genome_info:
        genome_info[genome].append(i)
      else:
        genome_info[genome] = [i]
      lasttag = tag
        
  #Repeat data saving
  if lasttag in passedtags:
    pickle_file = open("genecords" + os.sep + lasttag + ".pickle", "rb")
    previous_data = pickle.load(pickle_file)
    for key in previous_data:
      if key in genome_info:
        genome_info[key].extend(previous_data[key])
      else:
        genome_info[key] = previous_data[key]
  else:
    passedtags.append(lasttag)
  save_to_pickle(lasttag, genome_info)

  
  fileinput.close()

  #Sort dictionary by size
  #sortedgenecordskeylist = sortdictkeysbyvalues(genecordslistdict)

  #Archive directory as TAR file and remove original directory
  try:
    if GUI == "y":
      frame.update()
    tar = tarfile.open(dbname + ".cords.tar", "w")
    tar.add("genecords")
    tar.close()
  except:
    print("Could not create TAR file from genecords folder. Please create archive manually.")
    log("Could not create TAR file from genecords folder. Please create archive manually.", exit=True)
  if GUI == "y":
    frame.update()
  try:
    shutil.rmtree("genecords")
  except:
    pass
예제 #46
0
def main(args):
    global _stash
    ap = argparse.ArgumentParser()
    ap.add_argument('pattern', help='the pattern to match')
    ap.add_argument('files', nargs='*', help='files to be searched')
    ap.add_argument('-i',
                    '--ignore-case',
                    action='store_true',
                    help='ignore case while searching')
    ap.add_argument('-v',
                    '--invert',
                    action='store_true',
                    help='invert the search result')
    ap.add_argument('-c',
                    '--count',
                    action='store_true',
                    help='count the search results instead of normal output')
    ns = ap.parse_args(args)

    flags = 0
    if ns.ignore_case:
        flags |= re.IGNORECASE

    pattern = re.compile(ns.pattern, flags=flags)

    # Do not try to grep directories
    files = [f for f in ns.files if not os.path.isdir(f)]

    fileinput.close()  # in case it is not closed
    try:
        counts = collections.defaultdict(int)
        for line in fileinput.input(files,
                                    openhook=fileinput.hook_encoded("utf-8")):
            if bool(pattern.search(line)) != ns.invert:
                if ns.count:
                    counts[fileinput.filename()] += 1
                else:
                    if ns.invert:  # optimize: if ns.invert, then no match, so no highlight color needed
                        newline = line
                    else:
                        newline = re.sub(
                            pattern,
                            lambda m: _stash.text_color(m.group(), 'red'),
                            line)
                    if fileinput.isstdin():
                        fmt = u'{lineno}: {line}'
                    else:
                        fmt = u'{filename}: {lineno}: {line}'

                    print(
                        fmt.format(filename=fileinput.filename(),
                                   lineno=fileinput.filelineno(),
                                   line=newline.rstrip()))

        if ns.count:
            for filename, count in counts.items():
                fmt = u'{count:6} {filename}'
                print(fmt.format(filename=filename, count=count))

    except Exception as err:
        print("grep: {}: {!s}".format(type(err).__name__, err),
              file=sys.stderr)
    finally:
        fileinput.close()
예제 #47
0
def main(args, fout=sys.stdout):
    if args.debug:
        logger.setLevel(logging.DEBUG)

    conn = pd.read_table(args.db)
    conn = conn.loc[:,
                    ['Transcript stable ID', 'Gene type', 'Transcript type'
                     ]].drop_duplicates()
    conn = conn.set_index(['Transcript stable ID'])

    max_warnings = 10
    w = 0
    c = 0
    n = 0
    bad_chroms = set()
    for row in fileinput.input(args.annotation_file[0],
                               openhook=fileinput.hook_compressed):
        n = n + 1

        if fileinput.isfirstline() and (row.startswith("#bin") or \
                row.startswith("bin")):
            logger.debug("Header detected in genePred file. Assuming UCSC"
                         " format.")
            continue
        else:
            logger.debug("No header detected. Assuming custom genePred.")

        if row.startswith("#"):
            continue

        rowobj = Row(row)

        if not args.no_skip_random_chromosomes and \
            rowobj.is_on_random_chromosome():
            c = c + 1
            continue

        if rowobj.chromosome_contains_underscore():
            w = w + 1

            if rowobj.chrom not in bad_chroms:
                logger.warning("Skipping chromosome %s because it contains"
                               " underscores" % rowobj.chrom)
                bad_chroms.add(rowobj.chrom)
            continue

        # filter for only protein-coding genes
        try:
            result = conn.loc[get_stripped_name(rowobj.name)]
            if isinstance(result, pd.DataFrame):
                result = result.iloc[0, ]
            if not (result['Gene type'] == "protein_coding"
                    and result['Transcript type'] == "protein_coding"):
                c = c + 1
                continue
        except KeyError:
            c = c + 1
            continue

        bed = rowobj.extract_last_exon()

        if bed is not None:
            fout.write("\t".join([str(x) for x in bed]) + "\n")
        else:
            c = c + 1

    fileinput.close()
    if float(c) / float(n) > 0.75:
        logger.warning("%d/%d (%0.2f%%) were skipped. Are you using the "
                       "correct database?" % (c, n, float(c) / float(n)))
예제 #48
0
def demonstrate_clusters():
    from sklearn.cluster import KMeans
    from scipy import interpolate
    from matplotlib.ticker import MultipleLocator, FormatStrFormatter

    plist, X = [], []
    for line in fileinput.input("../data/var/weekday.txt"):
        part = line.strip().split(" ")
        x, y, f = int(part[0]), int(part[1]), [float(i) for i in part[2:]]
        plist.append([x, y])
        X.append(f)
    fileinput.close()

    k_means = KMeans(init='k-means++', n_clusters=3, n_init=10)
    k_means.fit(X)
    k_means.labels_ = k_means.labels_
    k_means.cluster_centers_ = k_means.cluster_centers_

    mesh = [[0 for j in xrange(rangey)] for i in xrange(rangex)]
    for i in xrange(len(k_means.labels_)):
        if k_means.labels_[i] == 0:
            mesh[plist[i][0]][plist[i][1]] = 1.5
        if k_means.labels_[i] == 1:
            mesh[plist[i][0]][plist[i][1]] = 0.6
        if k_means.labels_[i] == 2:
            mesh[plist[i][0]][plist[i][1]] = -1

    fig = plt.figure()
    ax = fig.add_subplot(111)
    (X, Y), C = meshgrid(np.arange(100),
                         np.arange(100)), np.array(mesh)[20:120, 20:120]
    pcolormesh(X, Y, C.T, cmap='RdBu', vmin=-2, vmax=2)
    plt.axis([0, 100 - 1, 0, 100 - 1])
    plt.xlabel('Longitude grid index /200m')
    plt.ylabel('Latitude grid index /200m')
    # plt.show()
    for postfix in ('eps', 'png'):
        savefig('../figure/{0}/13.{0}'.format(postfix))

    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    for _cluster, linestyle, label in [(0, 'k-', "Cluster 1"),
                                       (1, 'k--', "Cluster 2"),
                                       (2, 'k:', "Cluster 3")]:
        x, y = [i for i in xrange(ranget)], k_means.cluster_centers_[_cluster]
        tck = interpolate.splrep(x, y, s=0)
        xnew = np.arange(0, 23, 0.1)
        ynew = interpolate.splev(xnew, tck, der=0)
        plt.plot(xnew, ynew, linestyle, label=label, linewidth=2)
    plt.plot([0, 23], [0, 0], 'k--')
    plt.xlim(0, 23)
    plt.ylim(-0.03, 0.03)
    plt.xlabel('Time /hour')
    plt.ylabel('Differentiate index')
    handles, labels = ax1.get_legend_handles_labels()
    ax1.legend(handles, labels)
    xmajorLocator = MultipleLocator(1)
    xmajorFormatter = FormatStrFormatter('%d')
    ax1.xaxis.set_major_locator(xmajorLocator)
    ax1.xaxis.set_major_formatter(xmajorFormatter)
    # show()
    for postfix in ('eps', 'png'):
        savefig('../figure/{0}/14.{0}'.format(postfix))
예제 #49
0
def headquarters():
    positive = 0
    negative = 0
    not_found = 0

    f_not_found = open("not_found.txt", "w")
    f_negative = open("negative.txt", "w")
    f_positive = open("positive.txt", "w")

    tuples_not_found = set()

    for t in results:
        # first, try a direct match
        org_extracted = t[0].decode("utf8").upper().strip()
        locations_groundtruth = ground_truth.get(org_extracted)

        # if its a direct match with a ground truth organization, compare the locations
        if locations_groundtruth:
            loc_extracted = t[1].decode("utf8").upper().strip()
            found = False
            for locations in locations_groundtruth:
                # some locations in DBpedia contain diferente references, e.g., city,state
                # e.g.,: AUBURN HILLS, MICHIGAN
                # split and compare with both

                # in case it was found and got outside the for-loop below
                # no need to check more references
                if found == True:
                    break
                locations_parts = locations.split(",")
                for loc in locations_parts:
                    # match locations with Jaro-Winkler, keep those >=0.8 similarity score
                    score = jellyfish.jaro_winkler(
                        loc_extracted.encode("utf8"),
                        loc.strip().encode("utf8"))
                    if score >= 0.8:
                        f_positive.write(t[0] + '\t' + t[1] + '\n')
                        positive += 1
                        found = True
                        break

                    # if ground-truth (from DBpedia) is a country, and extracted is a city
                    # check if the city is in that country
                    elif loc in countries:
                        if loc_extracted.encode("utf8") in country_cities[loc]:
                            f_positive.write(t[0] + '\t' + t[1] + '\t' + '\n')
                            positive += 1
                            found = True
                            break

                    #TODO
                    # if ground-truth (from DBpedia) is a city, and extracted location is a country
                    # check if that city is located in that country only
                    # elif

            if found == False:
                negative += 1
                f_negative.write(
                    t[0] + '\t' + t[1] + '\t\t:' +
                    ';'.join(locations_groundtruth).encode("utf8") + '\n')

        else:
            tuples_not_found.add(t)

    # try to expand the acronyms
    names_found = set()
    for name in tuples_not_found:
        # if it is a single token with all uppercase letters
        if len(name[0].split()) == 1 and name[0].isupper():
            found = False
            # get all the possible expansions that match this acronym
            expansions = acronyms.get(name[0])
            if expansions:
                # check if any of these expansions is an organization in the
                # ground_truth database and if it is, extract the locations
                for e in expansions:
                    locations_groundtruth = ground_truth.get(e.upper())
                    if locations_groundtruth:
                        for location in locations_groundtruth:
                            locations_parts = location.split(",")
                            for loc in locations_parts:
                                # approximate similarity
                                score = jellyfish.jaro_winkler(
                                    loc.encode("utf8"), name[1].upper())
                                if score >= 0.8:
                                    #f_positive.write(name[0]+' ('+e+')\t'+name[1]+'\t'+str(avg_score)+'\n')
                                    f_positive.write(name[0] + ' (' + e +
                                                     ')\t' + name[1] + '\n')
                                    positive += 1
                                    found = True
                                    names_found.add(name)
                                    break

                        if (found == True):
                            break

    for n in names_found:
        tuples_not_found.remove(n)

    # for tuples not found query Freebase
    # cache of strings that were already queried to Freebase
    queried = []
    for line in fileinput.input(
            '/home/dsbatista/gigaword/ground-truth/freebase-queried.txt'):
        queried.append(line.strip())
    fileinput.close()

    # file to save Freebase query results
    output = codecs.open(
        '/home/dsbatista/gigaword/ground-truth/freebase-output.txt', 'a',
        "utf-8")

    # open file for append, update 'freebase-queried.txt' with new issue queries
    f_queried = open(
        '/home/dsbatista/gigaword/ground-truth/freebase-queried.txt', "a")

    tuples_found = []

    for t in tuples_not_found:
        org = t[0].strip()
        # for now do not query acronyms to Freebase with ~=, too many false positives
        if not (len(t[0].split()) == 1 and name[0].isupper()):
            # first check if that query string was already issued to Freebase
            # if not, query Freebase and save the result
            if org not in queried:
                if org == "Star-Times": continue
                response = queryFreebase(org)
                queried.append(org)
                if response != 'error':
                    try:
                        if response['result']:
                            print "found:\t", org
                            parseResponse(org, response, output)
                        else:
                            print "not found:\t", org
                        f_queried.write(org + '\n')
                        f_queried.flush()

                    except TypeError, e:
                        print org
                        print e
                        print response
                        f_queried.close()
                        output.close()
                        sys.exit(0)

                    except Exception, e:
                        print org
                        print e
                        print response
                        f_queried.close()
                        output.close()
                        sys.exit(0)
예제 #50
0
def build_ino(arduino_location, libraries_location, ino_file, com_num):
    # ===========================
    # create some background data
    # these need to reflect the details of your system

    # where is the Arduino program
    arduinoIdeVersion = {
        "1.5.6-r2": arduino_location,
        "1.6.3": arduino_location
    }

    # where are libraries stored
    arduinoExtraLibraries = libraries_location

    # where this program will store stuff
    # these directories will be beside this Python program
    compileDirName = "ArduinoTemp"
    archiveDirName = "ArduinoUploadArchive"

    # default build options
    build_options = {
        "action": "upload",
        "board": "arduino:avr:uno",
        "port": "COM" + str(com_num),
        "ide": "1.5.6-r2"
    }

    # some other important variables - just here for easy reference
    archiveRequired = False
    usedLibs = []
    hFiles = []

    # ============================
    # ensure directories exist
    # and empty the compile directory

    # first the directory used for compiling
    pythonDir = os.path.dirname(os.path.realpath(__file__))
    compileDir = os.path.join(pythonDir, compileDirName)
    if not os.path.exists(compileDir):
        os.makedirs(compileDir)

    existingFiles = os.listdir(compileDir)
    for f in existingFiles:
        os.remove(os.path.join(compileDir, f))

    # then the directory where the Archives are saved
    archiveDir = os.path.join(pythonDir, archiveDirName)
    if not os.path.exists(archiveDir):
        os.makedirs(archiveDir)

    # =============================
    # get the .ino file and figure out the build options
    # the stuff in the .ino file will have this format
    # and will start at the first line in the file
    # // python-build-start
    # // python-build-end

    inoFileName = ino_file
    inoBaseName, inoExt = os.path.splitext(os.path.basename(inoFileName))
    ''' kept for when dynamic parsing is added
    numLines = 1  # in case there is no end-line
    maxLines = 6
    buildError = ""
    if inoExt.strip() == ".ino":
        codeFile = open(inoFileName, 'r')
        startLine = codeFile.readline()[3:].strip()
        if startLine == "python-build-start":
            nextLine = codeFile.readline()[3:].strip()
            while nextLine != "python-build-end":
                buildCmd = nextLine.split(',')
                if len(buildCmd) > 1:
                    buildOptions[buildCmd[0].strip()] = buildCmd[1].strip()
                numLines += 1
                if numLines >= maxLines:
                    buildError = "No end line"
                    break
                nextLine = codeFile.readline()[3:].strip()
        else:
            buildError = "No start line"
    else:
        buildError = "Not a .ino file"

    if len(buildError) > 0:
        print("Sorry, can't process file - %s" % buildError)
    '''

    # print buid Options
    print("BUILD OPTIONS")
    for n, m in build_options.items():
        print("%s  %s" % (n, m))

    # =============================
    # get the program filename for the selected IDE
    arduinoProg = arduinoIdeVersion[build_options["ide"]]

    # =============================
    # prepare archive stuff
    #
    # create name of directory to save the code = name-yyyymmdd-hhmmss
    # this will go inside the directory archiveDir
    inoArchiveDirName = inoBaseName + time.strftime("-%Y%m%d-%H:%M:%S")
    # note this directory will only be created if there is a successful upload
    # the name is figured out here to be written into the .ino file so it can be printed by the Arduino code
    # it will appear as char archiveDirName[] = "nnnnn";

    # if the .ino file does not have a line with char archiveDirName[] then it will be assumed
    # that no archiving is required
    # check for existence of line
    for line in fileinput.input(inoFileName):
        if "char archiveDirName[]" in line:
            archiveRequired = True
            break
    fileinput.close()

    if archiveRequired:
        for line in fileinput.input(inoFileName, inplace=1):
            if "char archiveDirName[]" in line:
                print('char archiveDirName[] = "%s";' % inoArchiveDirName)
            else:
                print(line.rstrip())
        fileinput.close()
    # ~ os.utime(inoFileName, None)

    # =============================
    # figure out what libraries and .h files are used
    # if there are .h files they will need to be copied to ArduinoTemp

    # first get the list of all the extra libraries that exist
    extraLibList = os.listdir(arduinoExtraLibraries)

    # go through the .ino file to get any lines with #include
    includeLines = []
    for line in fileinput.input(inoFileName):
        if "#include" in line:
            includeLines.append(line.strip())
    fileinput.close()
    print("#INCLUDE LINES")
    print(includeLines)

    # now look for lines with < signifying libraries
    for n in includeLines:
        angleLine = n.split('<')
        if len(angleLine) > 1:
            lib_name = angleLine[1].split('>')
            lib_name = lib_name[0].split('.')
            lib_name = lib_name[0].strip()
            # add the name to usedLibs if it is in the extraLibList
            if lib_name in extraLibList:
                usedLibs.append(lib_name)
    print("LIBS TO BE ARCHIVED")
    print(usedLibs)

    # then look for lines with " signifiying a reference to a .h file
    # NB the name will be a full path name
    for n in includeLines:
        quoteLine = n.split('"')
        if len(quoteLine) > 1:
            hName = quoteLine[1].split('"')
            hName = hName[0].strip()
            # add the name to hFiles
            hFiles.append(hName)
    print(".h FILES TO BE ARCHIVED")
    print(hFiles)

    # ==============================
    # copy the .ino file to the directory compileDir and change its name to match the directory
    saveFile = os.path.join(compileDir, compileDirName + ".ino")
    shutil.copy(inoFileName, saveFile)

    # ===============================
    # generate the Arduino command
    arduino_command = "%s --%s --board %s --port %s %s" % (
        arduinoProg, build_options["action"], build_options["board"],
        build_options["port"], saveFile)
    print("ARDUINO COMMAND")
    print(arduino_command)

    # ===============================
    # call the IDE
    print("STARTING ARDUINO -- %s\n" % (build_options["action"]))

    presult = subprocess.call([
        arduinoProg,
        "--%s" % build_options["action"], "--board", build_options["board"],
        "--port", build_options["port"], saveFile
    ],
                              shell=True)

    if presult != 0:
        raise SystemError("Error, wrong COM number")
    else:
        print("\nARDUINO SUCCESSFUL")

    # ================================
    # after a successful upload we may need to archive the code
    if archiveRequired:
        print("\nARCHIVING")
        # create the Archive directory
        ar_dir = os.path.join(archiveDir, inoArchiveDirName)
        print(ar_dir)
        # this ought to be a unique name - hence no need to check for duplicates
        os.makedirs(ar_dir)
        # copy the code into the new directory
        shutil.copy(inoFileName, ar_dir)
        # copy the .h files to the new directory
        for n in hFiles:
            shutil.copy(n, ar_dir)
        # copy the used libraries to the new directory
        for n in usedLibs:
            lib_name = os.path.join(arduinoExtraLibraries, n)
            dest_dir = os.path.join(ar_dir, "libraries", n)
            shutil.copytree(lib_name, dest_dir)
        print("\nARCHIVING DONE")
예제 #51
0
def bs_single_end(main_read_file, asktag, adapter_file, cut1, cut2,
                  no_small_lines, max_mismatch_no, aligner_command, db_path,
                  tmp_path, outfile, XS_pct, XS_count, XSteve,
                  adapter_mismatch, show_multiple_hit, show_unmapped_hit):
    logm("----------------------------------------------")
    logm("Read filename: %s" % main_read_file)
    logm("The first base (for mapping): %d" % cut1)
    logm("The  last base (for mapping): %d" % cut2)
    logm("Path for short reads aligner: %s" % aligner_command + '\n')
    logm("Reference genome library path: %s" % db_path)
    if asktag == "Y":
        logm("Un-directional library")
    else:
        logm("Directional library")
    # end-of-if
    logm("Number of mismatches allowed: %s" % str(max_mismatch_no))
    # adapter : strand-specific or not
    adapter = ""
    adapter_fw = ""
    adapter_rc = ""
    if adapter_file != "":
        try:
            adapter_inf = open(adapter_file, "r")
            if asktag == "N":  #<--- directional library
                adapter = adapter_inf.readline()
                adapter_inf.close()
                adapter = adapter.rstrip("\n")[0:10]
            elif asktag == "Y":  #<--- un-directional library
                adapter_fw = adapter_inf.readline()
                adapter_rc = adapter_inf.readline()
                adapter_inf.close()
                adapter_fw = adapter_fw.rstrip("\n")[0:10]
                adapter_rc = adapter_rc.rstrip("\n")[-10::]
                if adapter_rc == "":
                    adapter_rc = reverse_compl_seq(adapter_fw)
            adapter_inf.close()
        except IOError:
            print "[Error] Cannot open adapter file : %s" % adapter_file
            exit(-1)
    if adapter_file != "":
        if asktag == "N":  #<--- directional library
            logm("Adapter sequence: %s" % adapter)
        elif asktag == "Y":
            logm("3\' end adapter sequence: %s" % adapter_fw)
            logm("5\' end adapter sequence: %s" % adapter_rc)
    logm("-------------------------------- ")

    # helper method to join fname with tmp_path
    tmp_d = lambda fname: os.path.join(tmp_path, fname)
    db_d = lambda fname: os.path.join(db_path, fname)

    # splitting the big read file
    input_fname = os.path.split(main_read_file)[1]

    #---- Stats ------------------------------------------------------------
    all_raw_reads = 0
    all_trimmed = 0
    all_mapped = 0
    all_mapped_passed = 0
    all_base_before_trim = 0
    all_base_after_trim = 0
    all_base_mapped = 0
    numbers_premapped_lst = [0, 0, 0, 0]
    numbers_mapped_lst = [0, 0, 0, 0]
    mC_lst = [0, 0, 0]
    uC_lst = [0, 0, 0]
    no_my_files = 0
    #----------------------------------------------------------------

    if show_multiple_hit is not None:
        outf_MH = open(show_multiple_hit, 'w')
    if show_unmapped_hit is not None:
        outf_UH = open(show_unmapped_hit, 'w')

    for read_file in isplit_file(main_read_file,
                                 tmp_d(input_fname) + '-s-', no_small_lines):
        #    for read_file in my_files:
        original_bs_reads = {}
        no_my_files += 1
        random_id = ".tmp-" + str(random.randint(1000000, 9999999))
        #-------------------------------------------------------------------
        # un-directional sequencing
        #-------------------------------------------------------------------
        if asktag == "Y":

            #----------------------------------------------------------------
            outfile2 = tmp_d('Trimmed_C2T.fa' + random_id)
            outfile3 = tmp_d('Trimmed_G2A.fa' + random_id)

            outf2 = open(outfile2, 'w')
            outf3 = open(outfile3, 'w')

            #----------------------------------------------------------------
            # detect format of input file
            try:
                if read_file.endswith(
                        ".gz"):  # support input file ending with ".gz"
                    read_inf = gzip.open(read_file, "rb")
                else:
                    read_inf = open(read_file, "r")
            except IOError:
                print "[Error] Cannot open input file : %s" % read_file
                exit(-1)

            logm("Start reading and trimming the input sequences")
            oneline = read_inf.readline()
            if oneline == "":
                oneline = "NNNN"
            l = oneline.split()
            input_format = ""
            if oneline[0] == "@":
                input_format = "fastq"
            elif len(l) == 1 and oneline[0] != ">":
                input_format = "seq"
            elif len(l) == 11:
                input_format = "qseq"
            elif oneline[0] == ">":
                input_format = "fasta"
            read_inf.close()

            #----------------------------------------------------------------
            # read sequence, remove adapter and convert
            read_id = ""
            seq = ""
            seq_ready = "N"
            line_no = 0
            fw_trimmed = 0
            rc_trimmed = 0
            for line in fileinput.input(read_file,
                                        openhook=fileinput.hook_compressed
                                        ):  # allow input with .gz
                if line == "":  # fix bug for empty input line
                    line = "NNNN"
                l = line.split()
                line_no += 1
                if input_format == "seq":
                    all_raw_reads += 1
                    read_id = str(all_raw_reads)
                    read_id = read_id.zfill(12)
                    seq = l[0]
                    seq_ready = "Y"
                elif input_format == "fastq":
                    l_fastq = math.fmod(line_no, 4)
                    if l_fastq == 1:
                        all_raw_reads += 1
                        read_id = l[0][1:]
                        seq_ready = "N"
                    elif l_fastq == 2:
                        seq = l[0]
                        seq_ready = "Y"
                    else:
                        seq = ""
                        seq_ready = "N"
                elif input_format == "qseq":
                    all_raw_reads += 1
                    read_id = str(all_raw_reads)
                    read_id = read_id.zfill(12)
                    seq = l[8]
                    seq_ready = "Y"
                elif input_format == "fasta":
                    l_fasta = math.fmod(line_no, 2)
                    if l_fasta == 1:
                        all_raw_reads += 1
                        read_id = l[0][1:]
                        seq = ""
                        seq_ready = "N"
                    elif l_fasta == 0:
                        seq = l[0]
                        seq_ready = "Y"

                #----------------------------------------------------------------
                if seq_ready == "Y":
                    seq = seq[cut1 -
                              1:cut2]  #<---- selecting 0..52 from 1..72  -e 52
                    seq = seq.upper()
                    seq = seq.replace(".", "N")

                    # striping BS adapter from 3' read
                    all_base_before_trim += len(seq)
                    if (adapter_fw != "") or (adapter_rc != ""):
                        new_read = RemoveAdapter(seq, adapter_fw,
                                                 adapter_mismatch)
                        if len(new_read) < len(seq):
                            fw_trimmed += 1
                        new_read_len = len(new_read)
                        #print new_read
                        new_read = Remove_5end_Adapter(new_read, adapter_rc,
                                                       adapter_mismatch)
                        new_read = RemoveAdapter(new_read, adapter_fw,
                                                 adapter_mismatch)
                        if len(new_read) < new_read_len:
                            rc_trimmed += 1
                        #print new_read
                        if len(new_read) < len(seq):
                            all_trimmed += 1
                        seq = new_read
                    all_base_after_trim += len(seq)
                    if len(seq) <= 4:
                        seq = ''.join(["N" for x in xrange(cut2 - cut1 + 1)])

                    #---------  trimmed_raw_BS_read  ------------------
                    original_bs_reads[read_id] = seq

                    #---------  FW_C2T  ------------------
                    outf2.write('>%s\n%s\n' % (read_id, seq.replace("C", "T")))
                    #---------  RC_G2A  ------------------
                    outf3.write('>%s\n%s\n' % (read_id, seq.replace("G", "A")))

            fileinput.close()

            outf2.close()
            outf3.close()

            delete_files(read_file)
            logm("Reads trimmed from 3\' end : %d " % fw_trimmed)
            logm("Reads trimmed from 5\' end : %d " % rc_trimmed)
            #--------------------------------------------------------------------------------
            # Bowtie mapping
            #-------------------------------------------------------------------------------
            logm("Start mapping")
            WC2T = tmp_d("W_C2T_m" + str(max_mismatch_no) + ".mapping" +
                         random_id)
            CC2T = tmp_d("C_C2T_m" + str(max_mismatch_no) + ".mapping" +
                         random_id)
            WG2A = tmp_d("W_G2A_m" + str(max_mismatch_no) + ".mapping" +
                         random_id)
            CG2A = tmp_d("C_G2A_m" + str(max_mismatch_no) + ".mapping" +
                         random_id)

            #    print aligner_command % {'int_no_mismatches' : int_no_mismatches,
            #                             'reference_genome' : os.path.join(db_path,'W_C2T'),
            #                             'input_file' : outfile2,
            #                             'output_file' : WC2T}

            run_in_parallel([
                aligner_command % {
                    'reference_genome': os.path.join(db_path, 'W_C2T'),
                    'input_file': outfile2,
                    'output_file': WC2T
                }, aligner_command % {
                    'reference_genome': os.path.join(db_path, 'C_C2T'),
                    'input_file': outfile2,
                    'output_file': CC2T
                }, aligner_command % {
                    'reference_genome': os.path.join(db_path, 'W_G2A'),
                    'input_file': outfile3,
                    'output_file': WG2A
                }, aligner_command % {
                    'reference_genome': os.path.join(db_path, 'C_G2A'),
                    'input_file': outfile3,
                    'output_file': CG2A
                }
            ])

            delete_files(outfile2, outfile3)

            #--------------------------------------------------------------------------------
            # Post processing
            #--------------------------------------------------------------------------------

            FW_C2T_U, FW_C2T_R = extract_mapping(WC2T)
            RC_G2A_U, RC_G2A_R = extract_mapping(CG2A)

            FW_G2A_U, FW_G2A_R = extract_mapping(WG2A)
            RC_C2T_U, RC_C2T_R = extract_mapping(CC2T)

            #----------------------------------------------------------------
            # get unique-hit reads
            #----------------------------------------------------------------
            Union_set = set(FW_C2T_U.iterkeys()) | set(
                RC_G2A_U.iterkeys()) | set(FW_G2A_U.iterkeys()) | set(
                    RC_C2T_U.iterkeys())

            Unique_FW_C2T = set()  # +
            Unique_RC_G2A = set()  # +
            Unique_FW_G2A = set()  # -
            Unique_RC_C2T = set()  # -
            Multiple_hits = set()

            for x in Union_set:
                _list = []
                for d in [FW_C2T_U, RC_G2A_U, FW_G2A_U, RC_C2T_U]:
                    mis_lst = d.get(x, [99])
                    mis = int(mis_lst[0])
                    _list.append(mis)
                for d in [FW_C2T_R, RC_G2A_R, FW_G2A_R, RC_C2T_R]:
                    mis = d.get(x, 99)
                    _list.append(mis)
                mini = min(_list)
                if _list.count(mini) == 1:
                    mini_index = _list.index(mini)
                    if mini_index == 0:
                        Unique_FW_C2T.add(x)
                    elif mini_index == 1:
                        Unique_RC_G2A.add(x)
                    elif mini_index == 2:
                        Unique_FW_G2A.add(x)
                    elif mini_index == 3:
                        Unique_RC_C2T.add(x)
                    # if mini_index = 4,5,6,7, indicating multiple hits
                    else:
                        Multiple_hits.add(x)
                else:
                    Multiple_hits.add(x)
            # write reads rejected by Multiple Hits to file
            if show_multiple_hit is not None:
                #outf_MH=open(show_multiple_hit,'w')
                for i in Multiple_hits:
                    outf_MH.write(">%s\n" % i)
                    outf_MH.write("%s\n" % original_bs_reads[i])
                #outf_MH.close()

            # write unmapped reads to file
            if show_unmapped_hit is not None:
                #outf_UH=open(show_unmapped_hit,'w')
                for i in original_bs_reads:
                    if i not in Union_set:
                        outf_UH.write(">%s\n" % i)
                        outf_UH.write("%s\n" % original_bs_reads[i])
                #outf_UH.close()

            del Union_set
            del FW_C2T_R
            del FW_G2A_R
            del RC_C2T_R
            del RC_G2A_R

            FW_C2T_uniq_lst = [[FW_C2T_U[u][1], u] for u in Unique_FW_C2T]
            FW_G2A_uniq_lst = [[FW_G2A_U[u][1], u] for u in Unique_FW_G2A]
            RC_C2T_uniq_lst = [[RC_C2T_U[u][1], u] for u in Unique_RC_C2T]
            RC_G2A_uniq_lst = [[RC_G2A_U[u][1], u] for u in Unique_RC_G2A]
            FW_C2T_uniq_lst.sort()
            RC_C2T_uniq_lst.sort()
            FW_G2A_uniq_lst.sort()
            RC_G2A_uniq_lst.sort()
            FW_C2T_uniq_lst = [x[1] for x in FW_C2T_uniq_lst]
            RC_C2T_uniq_lst = [x[1] for x in RC_C2T_uniq_lst]
            FW_G2A_uniq_lst = [x[1] for x in FW_G2A_uniq_lst]
            RC_G2A_uniq_lst = [x[1] for x in RC_G2A_uniq_lst]
            #----------------------------------------------------------------

            numbers_premapped_lst[0] += len(Unique_FW_C2T)
            numbers_premapped_lst[1] += len(Unique_RC_G2A)
            numbers_premapped_lst[2] += len(Unique_FW_G2A)
            numbers_premapped_lst[3] += len(Unique_RC_C2T)

            del Unique_FW_C2T
            del Unique_FW_G2A
            del Unique_RC_C2T
            del Unique_RC_G2A

            #----------------------------------------------------------------

            nn = 0
            gseq = dict()
            chr_length = dict()
            for ali_unique_lst, ali_dic in [(FW_C2T_uniq_lst, FW_C2T_U),
                                            (RC_G2A_uniq_lst, RC_G2A_U),
                                            (FW_G2A_uniq_lst, FW_G2A_U),
                                            (RC_C2T_uniq_lst, RC_C2T_U)]:
                nn += 1

                for header in ali_unique_lst:

                    _, mapped_chr, mapped_location, cigar = ali_dic[header]

                    original_BS = original_bs_reads[header]
                    #-------------------------------------
                    if mapped_chr not in gseq:
                        gseq[mapped_chr] = deserialize(db_d(mapped_chr))
                        chr_length[mapped_chr] = len(gseq[mapped_chr])

                    if nn == 2 or nn == 3:
                        cigar = list(reversed(cigar))
                    r_start, r_end, g_len = get_read_start_end_and_genome_length(
                        cigar)

                    all_mapped += 1

                    if nn == 1:  # +FW mapped to + strand:
                        FR = "+FW"
                        mapped_strand = "+"

                    elif nn == 2:  # +RC mapped to + strand:
                        FR = "+RC"  # RC reads from -RC reflecting the methylation status on Watson strand (+)
                        mapped_location = chr_length[
                            mapped_chr] - mapped_location - g_len
                        mapped_strand = "+"
                        original_BS = reverse_compl_seq(
                            original_BS)  # for RC reads

                    elif nn == 3:  # -RC mapped to - strand:
                        mapped_strand = "-"
                        FR = "-RC"  # RC reads from +RC reflecting the methylation status on Crick strand (-)
                        original_BS = reverse_compl_seq(
                            original_BS)  # for RC reads

                    elif nn == 4:  # -FW mapped to - strand:
                        mapped_strand = "-"
                        FR = "-FW"
                        mapped_location = chr_length[
                            mapped_chr] - mapped_location - g_len

                    origin_genome, next, output_genome = get_genomic_sequence(
                        gseq[mapped_chr], mapped_location,
                        mapped_location + g_len, mapped_strand)

                    r_aln, g_aln = cigar_to_alignment(cigar, original_BS,
                                                      origin_genome)

                    if len(r_aln) == len(g_aln):
                        N_mismatch = N_MIS(r_aln, g_aln)
                        #                        if N_mismatch <= int(max_mismatch_no):
                        mm_no = float(max_mismatch_no)
                        if (mm_no >= 1 and N_mismatch <= mm_no) or (
                                mm_no < 1 and N_mismatch <=
                            (mm_no * len(r_aln))):
                            numbers_mapped_lst[nn - 1] += 1
                            all_mapped_passed += 1
                            methy = methy_seq(r_aln, g_aln + next)
                            mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst)

                            #---XS FILTER----------------
                            XS = 0
                            if XSteve:
                                if ('ZZZ' in methy.translate(None, "-XY")):
                                    XS = 1
                                #
                            else:
                                nCH = methy.count('y') + methy.count('z')
                                nmCH = methy.count('Y') + methy.count('Z')
                                if ((nmCH > XS_count)
                                        and nmCH / float(nCH + nmCH) > XS_pct):
                                    XS = 1
                                #
                            #
                            outfile.store(header,
                                          N_mismatch,
                                          FR,
                                          mapped_chr,
                                          mapped_strand,
                                          mapped_location,
                                          cigar,
                                          original_BS,
                                          methy,
                                          XS,
                                          output_genome=output_genome)
                            all_base_mapped += len(original_BS)

            #----------------------------------------------------------------
            logm("--> %s (%d) " % (read_file, no_my_files))
            delete_files(WC2T, WG2A, CC2T, CG2A)

        #--------------------------------------------------------------------
        # directional sequencing
        #--------------------------------------------------------------------

        if asktag == "N":
            #----------------------------------------------------------------
            outfile2 = tmp_d('Trimmed_C2T.fa' + random_id)
            outf2 = open(outfile2, 'w')
            #----------------------------------------------------------------
            try:
                if read_file.endswith(
                        ".gz"):  # support input file ending with ".gz"
                    read_inf = gzip.open(read_file, "rb")
                else:
                    read_inf = open(read_file, "r")
            except IOError:
                print "[Error] Cannot open input file : %s" % read_file
                exit(-1)

            logm("Start reading and trimming the input sequences")
            oneline = read_inf.readline()
            if oneline == "":
                oneline = "NNNN"
            l = oneline.split()
            input_format = ""
            if oneline[0] == "@":
                input_format = "fastq"
            elif len(l) == 1 and oneline[0] != ">":
                input_format = "seq"
            elif len(l) == 11:
                input_format = "qseq"
            elif oneline[0] == ">":
                input_format = "fasta"
            read_inf.close()

            #print "detected data format: %s"%(input_format)
            #----------------------------------------------------------------
            read_id = ""
            seq = ""
            seq_ready = "N"
            line_no = 0
            for line in fileinput.input(read_file,
                                        openhook=fileinput.hook_compressed):
                if l == "":
                    l = "NNNN"
                l = line.split()
                line_no += 1
                if input_format == "seq":
                    all_raw_reads += 1
                    read_id = str(all_raw_reads)
                    read_id = read_id.zfill(12)
                    seq = l[0]
                    seq_ready = "Y"
                elif input_format == "fastq":
                    l_fastq = math.fmod(line_no, 4)
                    if l_fastq == 1:
                        all_raw_reads += 1
                        read_id = l[0][1:]
                        seq_ready = "N"
                    elif l_fastq == 2:
                        seq = l[0]
                        seq_ready = "Y"
                    else:
                        seq = ""
                        seq_ready = "N"
                elif input_format == "qseq":
                    all_raw_reads += 1
                    read_id = str(all_raw_reads)
                    read_id = read_id.zfill(12)
                    seq = l[8]
                    seq_ready = "Y"
                elif input_format == "fasta":
                    l_fasta = math.fmod(line_no, 2)
                    if l_fasta == 1:
                        all_raw_reads += 1
                        read_id = l[0][1:]
                        seq = ""
                        seq_ready = "N"
                    elif l_fasta == 0:
                        seq = l[0]
                        seq_ready = "Y"
                #--------------------------------
                if seq_ready == "Y":
                    seq = seq[cut1 -
                              1:cut2]  #<---selecting 0..52 from 1..72  -e 52
                    seq = seq.upper()
                    seq = seq.replace(".", "N")

                    #--striping adapter from 3' read -------
                    all_base_before_trim += len(seq)
                    if adapter != "":
                        new_read = RemoveAdapter(seq, adapter,
                                                 adapter_mismatch)
                        if len(new_read) < len(seq):
                            all_trimmed += 1
                        seq = new_read
                    all_base_after_trim += len(seq)
                    if len(seq) <= 4:
                        seq = "N" * (cut2 - cut1 + 1)

                    #---------  trimmed_raw_BS_read  ------------------
                    original_bs_reads[read_id] = seq

                    #---------  FW_C2T  ------------------
                    outf2.write('>%s\n%s\n' % (read_id, seq.replace("C", "T")))

            fileinput.close()

            outf2.close()
            delete_files(read_file)

            #--------------------------------------------------------------------------------
            # Bowtie mapping
            #--------------------------------------------------------------------------------
            logm("Start mapping")
            WC2T = tmp_d("W_C2T_m" + str(max_mismatch_no) + ".mapping" +
                         random_id)
            CC2T = tmp_d("C_C2T_m" + str(max_mismatch_no) + ".mapping" +
                         random_id)

            run_in_parallel([
                aligner_command % {
                    'reference_genome': os.path.join(db_path, 'W_C2T'),
                    'input_file': outfile2,
                    'output_file': WC2T
                }, aligner_command % {
                    'reference_genome': os.path.join(db_path, 'C_C2T'),
                    'input_file': outfile2,
                    'output_file': CC2T
                }
            ])

            delete_files(outfile2)

            #--------------------------------------------------------------------------------
            # Post processing
            #--------------------------------------------------------------------------------

            FW_C2T_U, FW_C2T_R = extract_mapping(WC2T)
            RC_C2T_U, RC_C2T_R = extract_mapping(CC2T)

            #----------------------------------------------------------------
            # get uniq-hit reads
            #----------------------------------------------------------------
            Union_set = set(FW_C2T_U.iterkeys()) | set(RC_C2T_U.iterkeys())

            Unique_FW_C2T = set()  # +
            Unique_RC_C2T = set()  # -
            Multiple_hits = set()
            # write reads rejected by Multiple Hits to file

            for x in Union_set:
                _list = []
                for d in [FW_C2T_U, RC_C2T_U]:
                    mis_lst = d.get(x, [99])
                    mis = int(mis_lst[0])
                    _list.append(mis)
                for d in [FW_C2T_R, RC_C2T_R]:
                    mis = d.get(x, 99)
                    _list.append(mis)
                mini = min(_list)
                #print _list
                if _list.count(mini) == 1:
                    mini_index = _list.index(mini)
                    if mini_index == 0:
                        Unique_FW_C2T.add(x)
                    elif mini_index == 1:
                        Unique_RC_C2T.add(x)
                    else:
                        Multiple_hits.add(x)
                else:
                    Multiple_hits.add(x)
            # write reads rejected by Multiple Hits to file
            if show_multiple_hit is not None:
                #outf_MH=open(show_multiple_hit,'w')
                for i in Multiple_hits:
                    outf_MH.write(">%s\n" % i)
                    outf_MH.write("%s\n" % original_bs_reads[i])
                #outf_MH.close()

            # write unmapped reads to file
            if show_unmapped_hit is not None:
                #outf_UH=open(show_unmapped_hit,'w')
                for i in original_bs_reads:
                    if i not in Union_set:
                        outf_UH.write(">%s\n" % i)
                        outf_UH.write("%s\n" % original_bs_reads[i])
                #outf_UH.close()

            FW_C2T_uniq_lst = [[FW_C2T_U[u][1], u] for u in Unique_FW_C2T]
            RC_C2T_uniq_lst = [[RC_C2T_U[u][1], u] for u in Unique_RC_C2T]
            FW_C2T_uniq_lst.sort()
            RC_C2T_uniq_lst.sort()
            FW_C2T_uniq_lst = [x[1] for x in FW_C2T_uniq_lst]
            RC_C2T_uniq_lst = [x[1] for x in RC_C2T_uniq_lst]

            #----------------------------------------------------------------

            numbers_premapped_lst[0] += len(Unique_FW_C2T)
            numbers_premapped_lst[1] += len(Unique_RC_C2T)

            #----------------------------------------------------------------

            nn = 0
            gseq = dict()
            chr_length = dict()
            for ali_unique_lst, ali_dic in [(FW_C2T_uniq_lst, FW_C2T_U),
                                            (RC_C2T_uniq_lst, RC_C2T_U)]:
                nn += 1
                for header in ali_unique_lst:
                    _, mapped_chr, mapped_location, cigar = ali_dic[header]
                    original_BS = original_bs_reads[header]
                    #-------------------------------------
                    if mapped_chr not in gseq:
                        gseq[mapped_chr] = deserialize(db_d(mapped_chr))
                        chr_length[mapped_chr] = len(gseq[mapped_chr])

                    r_start, r_end, g_len = get_read_start_end_and_genome_length(
                        cigar)

                    all_mapped += 1
                    if nn == 1:  # +FW mapped to + strand:
                        FR = "+FW"
                        mapped_strand = "+"
                    elif nn == 2:  # -FW mapped to - strand:
                        mapped_strand = "-"
                        FR = "-FW"
                        mapped_location = chr_length[
                            mapped_chr] - mapped_location - g_len

                    origin_genome, next, output_genome = get_genomic_sequence(
                        gseq[mapped_chr], mapped_location,
                        mapped_location + g_len, mapped_strand)
                    r_aln, g_aln = cigar_to_alignment(cigar, original_BS,
                                                      origin_genome)

                    if len(r_aln) == len(g_aln):
                        N_mismatch = N_MIS(
                            r_aln, g_aln
                        )  #+ original_BS_length - (r_end - r_start) # mismatches in the alignment + soft clipped nucleotides
                        mm_no = float(max_mismatch_no)
                        if (mm_no >= 1 and N_mismatch <= mm_no) or (
                                mm_no < 1 and N_mismatch <=
                            (mm_no * len(r_aln))):
                            numbers_mapped_lst[nn - 1] += 1
                            all_mapped_passed += 1
                            methy = methy_seq(r_aln, g_aln + next)
                            mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst)

                            #---XS FILTER----------------
                            XS = 0
                            if XSteve:
                                if ('ZZZ' in methy.translate(None, "-XY")):
                                    XS = 1
                                #
                            else:
                                nCH = methy.count('y') + methy.count('z')
                                nmCH = methy.count('Y') + methy.count('Z')
                                if ((nmCH > XS_count)
                                        and nmCH / float(nCH + nmCH) > XS_pct):
                                    XS = 1
                                #
                            #
                            outfile.store(header,
                                          N_mismatch,
                                          FR,
                                          mapped_chr,
                                          mapped_strand,
                                          mapped_location,
                                          cigar,
                                          original_BS,
                                          methy,
                                          XS,
                                          output_genome=output_genome)
                            all_base_mapped += len(original_BS)

            #----------------------------------------------------------------
            logm("--> %s (%d) " % (read_file, no_my_files))
            delete_files(WC2T, CC2T)

    #----------------------------------------------------------------

    delete_files(tmp_path)

    if show_multiple_hit is not None:
        outf_MH.close()

    if show_unmapped_hit is not None:
        outf_UH.close()

    logm("----------------------------------------------")
    logm("Number of raw reads: %d" % all_raw_reads)
    if all_raw_reads > 0:
        logm("Number of bases in total: %d " % all_base_before_trim)
        if (asktag == "N" and adapter != "") or (asktag == "Y"
                                                 and adapter_fw != ""):
            logm("Number of reads having adapter removed: %d" % all_trimmed)
            trim_percent = (
                float(all_base_after_trim) /
                all_base_before_trim) if all_base_before_trim > 0 else 0
            logm("Number of bases after trimming the adapters: %d (%1.3f)" %
                 (all_base_after_trim, trim_percent))
        #
        logm("Number of reads are rejected because of multiple hits: %d" %
             len(Multiple_hits))
        logm("Number of unique-hits reads (before post-filtering): %d" %
             all_mapped)
        if asktag == "Y":
            logm(
                "  %7d FW reads mapped to Watson strand (before post-filtering)"
                % (numbers_premapped_lst[0]))
            logm(
                "  %7d RC reads mapped to Watson strand (before post-filtering)"
                % (numbers_premapped_lst[1]))
            logm(
                "  %7d FW reads mapped to Crick strand (before post-filtering)"
                % (numbers_premapped_lst[2]))
            logm(
                "  %7d RC reads mapped to Crick strand (before post-filtering)"
                % (numbers_premapped_lst[3]))
        elif asktag == "N":
            logm(
                "  %7d FW reads mapped to Watson strand (before post-filtering)"
                % (numbers_premapped_lst[0]))
            logm(
                "  %7d FW reads mapped to Crick strand (before post-filtering)"
                % (numbers_premapped_lst[1]))

        logm("Post-filtering %d uniquely aligned reads with mismatches <= %s" %
             (all_mapped_passed, max_mismatch_no))
        if asktag == "Y":
            logm("  %7d FW reads mapped to Watson strand" %
                 (numbers_mapped_lst[0]))
            logm("  %7d RC reads mapped to Watson strand" %
                 (numbers_mapped_lst[1]))
            logm("  %7d FW reads mapped to Crick strand" %
                 (numbers_mapped_lst[2]))
            logm("  %7d RC reads mapped to Crick strand" %
                 (numbers_mapped_lst[3]))
        elif asktag == "N":
            logm("  %7d FW reads mapped to Watson strand" %
                 (numbers_mapped_lst[0]))
            logm("  %7d FW reads mapped to Crick strand" %
                 (numbers_mapped_lst[1]))
        Mappability = (100 * float(all_mapped_passed) /
                       all_raw_reads) if all_raw_reads > 0 else 0
        logm("Mappability = %1.4f%%" % Mappability)
        logm("Total bases of uniquely mapped reads : %7d" % all_base_mapped)
        #
        n_CG = mC_lst[0] + uC_lst[0]
        n_CHG = mC_lst[1] + uC_lst[1]
        n_CHH = mC_lst[2] + uC_lst[2]
        #
        logm("----------------------------------------------")
        logm("Methylated C in mapped reads ")
        #
        logm(" mCG  %1.3f%%" %
             ((100 * float(mC_lst[0]) / n_CG) if n_CG != 0 else 0))
        logm(" mCHG %1.3f%%" %
             ((100 * float(mC_lst[1]) / n_CHG) if n_CHG != 0 else 0))
        logm(" mCHH %1.3f%%" %
             ((100 * float(mC_lst[2]) / n_CHH) if n_CHH != 0 else 0))
        #
    logm("----------------------------------------------")
    logm("File : %s" % main_read_file)
    elapsed("Resource / CPU time")
    logm("------------------- END --------------------")
    close_log()
예제 #52
0
def ignoreORGs(data):
    for line in fileinput.input(data):
        ORGS_to_ignore.append(line.strip())
    fileinput.close()
예제 #53
0
def rec_cnt(fqfn):
    for rec in fileinput.input(fqfn):
        pass
    rec_cnt = fileinput.lineno()
    fileinput.close()
    return rec_cnt
예제 #54
0
def write_after_line(line_search, new_text, filepath):
    for line in fileinput.input(filepath, inplace=True):
        print(line.rstrip("\n"))
        if line_search in line.strip():
            print(new_text)
    fileinput.close()
예제 #55
0
def _replace_in_file(filename, old, new):
    """ Replaces old with new in file filename. """
    for line in fileinput.FileInput(filename, inplace=1):
        line = line.replace(old, new)
        print(line, end='')
    fileinput.close()
def main():
	parser = OptionParser(usage="usage: %prog [options] json_file1 json_file2 ...")
	parser.add_option("-t", "--top", action="store", type="int", dest="top", help="number of top pairs to display", default=10)
	parser.add_option("-o", action="store", type="string", dest="out_path", help="output path", default="hashtag-cooccurrences.csv")
	(options, args) = parser.parse_args()	
	if( len(args) < 1 ):
		parser.error( "Must specify at least one JSONL file" )
	log.basicConfig(level=20, format='%(message)s')

	# Count pairs of hashtags in the same tweet
	pair_counts = defaultdict(int)
	for tweets_path in args:
		log.info("Loading tweets from %s ..." % tweets_path)
		# Process every line as JSON data
		hashtags = {}
		num_tweets, num_failed, line_number = 0, 0, 0
		num_multiple = 0
		for l in fileinput.input(tweets_path):
			l = l.strip()
			if len(l) == 0:
				continue
			try:
				line_number += 1
				tweet = json.loads(l)
				tweet_tags = set()
				# find the tags
				if "entities" in tweet:
					if "hashtags" in tweet["entities"] and len(tweet["entities"]["hashtags"]) > 0:
						for tag in tweet["entities"]["hashtags"]:
							tweet_tags.add( "#" + tag["text"].lower().strip() )
				# do not count duplicates
				tweet_tags = list(tweet_tags)
				# process the pairs
				if len(tweet_tags) > 1:
					num_multiple += 1
					for p in itertools.combinations(tweet_tags, 2):
						if p[0] < p[1]:
							pair = frozenset( [p[0],p[1]] )
						else:
							pair = frozenset( [p[1],p[0]] )
						pair_counts[pair] += 1
				num_tweets += 1
				if line_number % 50000 == 0:
					log.info("Processed %d lines" % line_number)
			except Exception as e:
				log.error("Failed to parse tweet on line %d: %s" % ( line_number, e ) )
				num_failed += 1
		fileinput.close()
		log.info("Processed %d tweets from file" % num_tweets )
		log.info("%d/%d tweets in file contained more than one hashtag" % ( num_multiple, num_tweets ) )
	log.info("Total of %d unique pairs of hashtags" % len(pair_counts) )

	# Output pairs
	log.info("Writing pairs to %s ..." % options.out_path )
	fout = codecs.open( options.out_path, "w", encoding="utf-8", errors="ignore" )
	fout.write("Hashtag1\tHastag2\tCount\n")
	for p in pair_counts:
		pair = list(p)
		pair.sort()
		fout.write( "%s\t%s\t%d\n" % ( pair[0], pair[1], pair_counts[p] )  )
	fout.close()

	# Display top counts
	sx = sorted(pair_counts.items(), key=operator.itemgetter(1), reverse=True)
	log.info("Top %d co-occurring hashtag pairs:" % min( len(sx), options.top ) )
	tab = PrettyTable( ["Hashtag1", "Hashtag2", "Count"] )
	tab.align["Hashtag1"] = "l"
	tab.align["Hashtag2"] = "l"
	tab.align["Count"] = "r"
	for i, p in enumerate(sx):
		if i > options.top:
			break
		pair = list(p[0])
		pair.sort()
		tab.add_row( [pair[0], pair[1], p[1]] )
	log.info(tab)		
def func2():
    with open("../data/feature/trace_all_statistic_filter","w") as f:
        for line in fileinput.input("../data/feature/trace_all_statistic"):
            part = line.strip().split(" ")
            mac, sex, tot, feat = part[0], {"男性":0, "女性":1}[part[1]], part[2], part[3:]
            array_0, array_p, array_1, array_2, array_3, array_4, array_5, array_6, array_7, array_8, array_9, array_10 = {}, {}, [], [], [], [], [], [], [], [], [], []
            for one in feat:
                objs = one.split("@")
                chars, ints = objs[0].split("+"), objs[1].split(",")
                if len(chars) == 1:
                    if chars[0] in ["WD","WE","A","B","C","D","0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23"]:
                        array_0[chars[0]] = ints[1]+","+str(int(float(ints[2])*1000))+","+str(int(float(ints[3])*1000))+","+ints[4]+","+ints[5]+","+ints[6]+","+ints[7]+","+str(int(float(ints[8])*1000))
                    else:
                        array_p[chars[0]] = str(int(float(ints[2])*1000))+","+str(int(float(ints[3])*1000))+","+ints[4]+","+ints[5]+","+ints[6]+","+ints[7]+","+str(int(float(ints[8])*1000))
                if len(chars) == 2 and chars[0] in ["WD","WE"] and chars[1] in ["A","B","C","D"]:
                    array_0["+".join(chars)] = str(int(float(ints[2])*1000))+","+str(int(float(ints[3])*1000))+","+ints[4]+","+ints[5]+","+ints[6]+","+ints[7]+","+str(int(float(ints[8])*1000))
                if len(chars) == 2 and chars[0] in ["WD","WE"] and chars[1] in ["0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23"]:
                    array_0["+".join(chars)] = str(int(float(ints[2])*1000))+","+str(int(float(ints[3])*1000))+","+ints[4]+","+ints[5]+","+ints[6]+","+ints[7]+","+str(int(float(ints[8])*1000))
                if len(chars) == 2 and chars[1] in ["WD","WE"]:
                    if chars[0] in ["Acad","Adm","Ath","Cant","Hosp","Lib","Soc","Supp","Teach","Other"]:
                        array_1.append({'k':"+".join(chars),'cnt':int(float(ints[2])*1000),'sum':int(float(ints[3])*1000),'min':int(ints[4]),'max':int(ints[5]),'avg':int(ints[6]),'med':int(ints[7]),'std':int(float(ints[8])*1000)})
                    else:
                        array_2.append({'k':"+".join(chars),'cnt':int(float(ints[2])*1000),'sum':int(float(ints[3])*1000),'min':int(ints[4]),'max':int(ints[5]),'avg':int(ints[6]),'med':int(ints[7]),'std':int(float(ints[8])*1000)})
                if len(chars) == 2 and chars[1] in ["A","B","C","D"]:
                    if chars[0] in ["Acad","Adm","Ath","Cant","Hosp","Lib","Soc","Supp","Teach","Other"]:
                        array_3.append({'k':"+".join(chars),'cnt':int(float(ints[2])*1000),'sum':int(float(ints[3])*1000),'min':int(ints[4]),'max':int(ints[5]),'avg':int(ints[6]),'med':int(ints[7]),'std':int(float(ints[8])*1000)})
                    else:
                        array_4.append({'k':"+".join(chars),'cnt':int(float(ints[2])*1000),'sum':int(float(ints[3])*1000),'min':int(ints[4]),'max':int(ints[5]),'avg':int(ints[6]),'med':int(ints[7]),'std':int(float(ints[8])*1000)})
                if len(chars) == 2 and chars[1] in ["0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23"]:
                    if chars[0] in ["Acad","Adm","Ath","Cant","Hosp","Lib","Soc","Supp","Teach","Other"]:
                        array_5.append({'k':"+".join(chars),'cnt':int(float(ints[2])*1000),'sum':int(float(ints[3])*1000),'min':int(ints[4]),'max':int(ints[5]),'avg':int(ints[6]),'med':int(ints[7]),'std':int(float(ints[8])*1000)})
                    else:
                        array_6.append({'k':"+".join(chars),'cnt':int(float(ints[2])*1000),'sum':int(float(ints[3])*1000),'min':int(ints[4]),'max':int(ints[5]),'avg':int(ints[6]),'med':int(ints[7]),'std':int(float(ints[8])*1000)})
                if len(chars) == 3 and chars[1] in ["WD","WE"] and chars[2] in ["A","B","C","D"]:
                    if chars[0] in ["Acad","Adm","Ath","Cant","Hosp","Lib","Soc","Supp","Teach","Other"]:
                        array_7.append({'k':"+".join(chars),'cnt':int(float(ints[2])*1000),'sum':int(float(ints[3])*1000),'min':int(ints[4]),'max':int(ints[5]),'avg':int(ints[6]),'med':int(ints[7]),'std':int(float(ints[8])*1000)})
                    else:
                        array_8.append({'k':"+".join(chars),'cnt':int(float(ints[2])*1000),'sum':int(float(ints[3])*1000),'min':int(ints[4]),'max':int(ints[5]),'avg':int(ints[6]),'med':int(ints[7]),'std':int(float(ints[8])*1000)})
                if len(chars) == 3 and chars[1] in ["WD","WE"] and chars[2] in ["0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23"]:
                    if chars[0] in ["Acad","Adm","Ath","Cant","Hosp","Lib","Soc","Supp","Teach","Other"]:
                        array_9.append({'k':"+".join(chars),'cnt':int(float(ints[2])*1000),'sum':int(float(ints[3])*1000),'min':int(ints[4]),'max':int(ints[5]),'avg':int(ints[6]),'med':int(ints[7]),'std':int(float(ints[8])*1000)})
                    else:
                        array_10.append({'k':"+".join(chars),'cnt':int(float(ints[2])*1000),'sum':int(float(ints[3])*1000),'min':int(ints[4]),'max':int(ints[5]),'avg':int(ints[6]),'med':int(ints[7]),'std':int(float(ints[8])*1000)})
            array_1_cnt, array_1_avg = sorted(array_1, key=lambda k: k['cnt'], reverse = True)[0:10],  sorted(array_1, key=lambda k: k['avg'], reverse = True)[0:10]
            for tmp in array_1_cnt:
                if not tmp in array_1_avg:
                    array_1_avg.append(tmp)
            array_2_cnt, array_2_avg = sorted(array_2, key=lambda k: k['cnt'], reverse = True)[0:30], sorted(array_2, key=lambda k: k['avg'], reverse = True)[0:30]
            for tmp in array_2_cnt:
                if not tmp in array_2_avg:
                    array_2_avg.append(tmp)
            array_3_cnt, array_3_avg = sorted(array_3, key=lambda k: k['cnt'], reverse = True)[0:10],  sorted(array_3, key=lambda k: k['avg'], reverse = True)[0:10]
            for tmp in array_3_cnt:
                if not tmp in array_3_avg:
                    array_3_avg.append(tmp)
            array_4_cnt, array_4_avg = sorted(array_4, key=lambda k: k['cnt'], reverse = True)[0:30], sorted(array_4, key=lambda k: k['avg'], reverse = True)[0:30]
            for tmp in array_4_cnt:
                if not tmp in array_4_avg:
                    array_4_avg.append(tmp)
            array_5_cnt, array_5_avg = sorted(array_5, key=lambda k: k['cnt'], reverse = True)[0:10],  sorted(array_5, key=lambda k: k['avg'], reverse = True)[0:10]
            for tmp in array_5_cnt:
                if not tmp in array_5_avg:
                    array_5_avg.append(tmp)
            array_6_cnt, array_6_avg = sorted(array_6, key=lambda k: k['cnt'], reverse = True)[0:30], sorted(array_6, key=lambda k: k['avg'], reverse = True)[0:30]
            for tmp in array_6_cnt:
                if not tmp in array_6_avg:
                    array_6_avg.append(tmp)
            array_7_cnt, array_7_avg = sorted(array_7, key=lambda k: k['cnt'], reverse = True)[0:10],  sorted(array_7, key=lambda k: k['avg'], reverse = True)[0:10]
            for tmp in array_7_cnt:
                if not tmp in array_7_avg:
                    array_7_avg.append(tmp)
            array_8_cnt, array_8_avg = sorted(array_8, key=lambda k: k['cnt'], reverse = True)[0:30], sorted(array_8, key=lambda k: k['avg'], reverse = True)[0:30]
            for tmp in array_8_cnt:
                if not tmp in array_8_avg:
                    array_8_avg.append(tmp)
            array_9_cnt, array_9_avg = sorted(array_9, key=lambda k: k['cnt'], reverse = True)[0:10],  sorted(array_9, key=lambda k: k['avg'], reverse = True)[0:10]
            for tmp in array_9_cnt:
                if not tmp in array_9_avg:
                    array_9_avg.append(tmp)
            array_10_cnt, array_10_avg = sorted(array_10, key=lambda k: k['cnt'], reverse = True)[0:30], sorted(array_10, key=lambda k: k['avg'], reverse = True)[0:30]
            for tmp in array_10_cnt:
                if not tmp in array_10_avg:
                    array_10_avg.append(tmp)
            array = []
            for key in ["WD","WE","A","B","C","D","0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23"]:
                if array_0.has_key(key):
                    array.append(key+"@"+array_0[key])
                else:
                    array.append(key+"@0,0,0,0,0,0,0,0")
            for key1 in ["WD","WE"]:
                for key2 in ["A","B","C","D"]:
                    key = key1+"+"+key2
                    if array_0.has_key(key):
                        array.append(key+"@"+array_0[key])
                    else:
                        array.append(key+"@0,0,0,0,0,0,0")
            for key1 in ["WD","WE"]:
                for key2 in ["0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23"]:
                    key = key1+"+"+key2
                    if array_0.has_key(key):
                        array.append(key+"@"+array_0[key])
                    else:
                        array.append(key+"@0,0,0,0,0,0,0")
            for k,v in array_p.iteritems():
                array.append(k+'@'+v)
            for one in array_1_avg:
                array.append(one['k']+"@"+str(one['cnt'])+","+str(one['sum'])+","+str(one['min'])+","+str(one['max'])+","+str(one['avg'])+","+str(one['med'])+","+str(one['std']))
            for one in array_2_avg:
                array.append(one['k']+"@"+str(one['cnt'])+","+str(one['sum'])+","+str(one['min'])+","+str(one['max'])+","+str(one['avg'])+","+str(one['med'])+","+str(one['std']))
            for one in array_3_avg:
                array.append(one['k']+"@"+str(one['cnt'])+","+str(one['sum'])+","+str(one['min'])+","+str(one['max'])+","+str(one['avg'])+","+str(one['med'])+","+str(one['std']))
            for one in array_4_avg:
                array.append(one['k']+"@"+str(one['cnt'])+","+str(one['sum'])+","+str(one['min'])+","+str(one['max'])+","+str(one['avg'])+","+str(one['med'])+","+str(one['std']))
            for one in array_5_avg:
                array.append(one['k']+"@"+str(one['cnt'])+","+str(one['sum'])+","+str(one['min'])+","+str(one['max'])+","+str(one['avg'])+","+str(one['med'])+","+str(one['std']))
            for one in array_6_avg:
                array.append(one['k']+"@"+str(one['cnt'])+","+str(one['sum'])+","+str(one['min'])+","+str(one['max'])+","+str(one['avg'])+","+str(one['med'])+","+str(one['std']))
            for one in array_7_avg:
                array.append(one['k']+"@"+str(one['cnt'])+","+str(one['sum'])+","+str(one['min'])+","+str(one['max'])+","+str(one['avg'])+","+str(one['med'])+","+str(one['std']))
            for one in array_8_avg:
                array.append(one['k']+"@"+str(one['cnt'])+","+str(one['sum'])+","+str(one['min'])+","+str(one['max'])+","+str(one['avg'])+","+str(one['med'])+","+str(one['std']))
            for one in array_9_avg:
                array.append(one['k']+"@"+str(one['cnt'])+","+str(one['sum'])+","+str(one['min'])+","+str(one['max'])+","+str(one['avg'])+","+str(one['med'])+","+str(one['std']))
            for one in array_10_avg:
                array.append(one['k']+"@"+str(one['cnt'])+","+str(one['sum'])+","+str(one['min'])+","+str(one['max'])+","+str(one['avg'])+","+str(one['med'])+","+str(one['std']))
            f.write(mac+' '+str(sex)+' '+tot+' '+" ".join(array)+'\n')
        fileinput.close()
예제 #58
0
def load_file(fn: str) -> List[str]:
    out_recs = []
    for rec in fileinput.input(fn):
        out_recs.append(rec)
    fileinput.close()
    return out_recs
def func1():
    class Stats:
        def __init__(self, sequence):
            self.sequence = [int(item) for item in sequence]
        def sum(self):
            if len(self.sequence) < 1:
                return None
            else:
                return sum(self.sequence)
        def cnt(self):
            return len(self.sequence)
        def min(self):
            if len(self.sequence) < 1:
                return None
            else:
                return min(self.sequence)
        def max(self):
            if len(self.sequence) < 1:
                return None
            else:
                return max(self.sequence)
        def avg(self):
            if len(self.sequence) < 1:
                return None
            else:
                return sum(self.sequence) / len(self.sequence)    
        def med(self):
            if len(self.sequence) < 1:
                return None
            else:
                self.sequence.sort()
                return self.sequence[len(self.sequence) // 2]
        def std(self):
            if len(self.sequence) < 1:
                return None
            else:
                avg = self.avg()
                sdsq = sum([(i - avg) ** 2 for i in self.sequence])
                stdev = (sdsq / (len(self.sequence) - 1)) ** .5
                return stdev

    mapping, glob = {}, {}
    for line in gzip.open("../data/feature/trace_all.gz"):
    	part = line.strip().split(" ")
    	mac, dy, wd, ss, hr, cl, bd, dr = part[0].replace(":",""), part[1], part[2], part[3], part[4], part[5], part[6], int(part[8])
    	wd = {'1':'WD','2':'WE'}[wd]
    	ss = {'1':'A','2':'B','3':'C','4':'D'}[ss]
    	if not mapping.has_key(mac):
    		mapping[mac] = {}
    	if not glob.has_key(mac):
    		glob[mac] = {}
    	if not glob[mac].has_key(dy):
    		glob[mac][dy] = 0
    	glob[mac][dy] = glob[mac][dy]+dr
    	array = [wd, ss, hr, cl, bd, wd+'+'+ss, wd+'+'+hr,\
                cl+'+'+wd, cl+'+'+ss, cl+'+'+hr, cl+'+'+wd+'+'+ss, cl+'+'+wd+'+'+hr,\
                bd+'+'+wd, bd+'+'+ss, bd+'+'+hr, bd+'+'+wd+'+'+ss, bd+'+'+wd+'+'+hr]
    	for one in array:
    		if not mapping[mac].has_key(one):
    			mapping[mac][one] = {}
    		if not mapping[mac][one].has_key(dy):
    			mapping[mac][one][dy] = []
    		if dr>0:
    			mapping[mac][one][dy].append(dr)

    jac = {}
    for line in fileinput.input("../data/jaccount/jaccount_taged"):
        part = line.strip().split(" ")
        dev, mac, sex = part[0], part[1], part[2]
        if dev == "mobile":
            jac[mac] = {'sex':sex}
    fileinput.close()

    with open('../data/feature/trace_all_statistic', 'w') as f:
        for k,v in mapping.iteritems():
            if jac.has_key(k):
                array = []
                for x,y in glob[k].iteritems():
                    array.append(y)
                if len(array) >= 2:
                    stats = Stats(array)
                    _cnt, _sum, _min, _max, _avg, _med, _std = stats.cnt(), stats.sum(), stats.min(), stats.max(), stats.avg(), stats.med(), stats.std()
                    f.write(k+' '+jac[k]['sex']+' tot@'+str(_cnt)+','+str(_sum)+','+str(_min)+','+str(_max)+','+str(_avg)+','+str(_med)+','+str(int(_std)))
                    for p,q in v.iteritems():
                        array = []
                        for x,y in q.iteritems():
                            array.append(sum(y))
                        if len(array) >= 2:
                            stats = Stats(array)
                            f.write(' '+p+'@'+str(stats.cnt())+','+str(stats.sum())+',%.4f,'%(float(stats.cnt())/_cnt)+'%.4f,'%(float(stats.sum())/_sum)+str(stats.min())+','+str(stats.max())+','+str(stats.avg())+','+str(stats.med())+','+str(int(stats.std())))
                    f.write('\n')
예제 #60
0
def edit_ossec_conf():
    rule_elements = []

    # Ignore elements (legacy elements and default elements)
    ignore_elements = [
        '<!--', '<decoder>etc/decoder.xml', '<decoder>etc/local_decoder.xml',
        '<decoder_dir>etc/decoders', '<decoder_dir>etc/ossec_decoders',
        '<decoder_dir>etc/wazuh_decoders', '<include>local_rules.xml'
    ]

    # Template
    template_file = open("{0}/rules.template".format(source_rules_path), 'r')
    include_template = template_file.readlines()
    include_template = include_template[
        3:-2]  # Remove 3 first lines and 2 last lines
    template_file.close()

    # Remove "<rules>*</rules>" and "...ossec_config>  <!-- rules global entry -->"
    inside_rules = False
    for line in fileinput.input(ossec_conf, inplace=True):
        if '<rules>' in line.strip():
            inside_rules = True
            continue  # Remove line
        elif '</rules>' in line.strip():
            inside_rules = False
            continue  # Remove line
        elif 'rules global entry' in line.strip():
            continue  # Remove line
        else:
            if inside_rules:
                if any(ignore_element in line.strip()
                       for ignore_element in ignore_elements):
                    continue
                else:
                    rule_elements.append(line)  # Save rule element
            else:
                print(line.rstrip("\n"))  # Keep line
    fileinput.close()

    # Custom items in <rules>
    custom_decoder = []
    custom_include = []
    custom_list = []
    custom_rule_dir = []

    for rule_element in rule_elements:
        if '<decoder' in rule_element:
            custom_decoder.append(rule_element)
        elif '<list>' in rule_element:
            custom_list.append(rule_element)
        elif '<rule_dir>' in rule_element:
            custom_rule_dir.append(rule_element)
        elif '<include>' in rule_element:
            m = search('<include>(.+_rules.xml)', rule_element)
            if m:
                rule = m.group(1)
                if any(rule in include_r for include_r in include_template):
                    continue
                else:
                    custom_include.append(rule_element)

    # Write file
    with open(ossec_conf, "a") as conf_file:
        conf_file.write("<ossec_config>  <!-- rules global entry -->\n")
        conf_file.write("  <rules>\n")
        conf_file.write("    <decoder_dir>etc/decoders</decoder_dir>\n")
        for c_d in custom_decoder:
            conf_file.write(c_d)
        conf_file.write("    <decoder>etc/local_decoder.xml</decoder>\n")
        for c_l in custom_list:
            conf_file.write(c_l)
        for i_t in include_template:
            conf_file.write(i_t)
        for c_r in custom_rule_dir:
            conf_file.write(c_r)
        for c_i in custom_include:
            conf_file.write(c_i)
        conf_file.write("    <include>local_rules.xml</include>\n")
        conf_file.write("  </rules>\n")
        conf_file.write("</ossec_config>  <!-- rules global entry -->\n")

    os.chown(ossec_conf, root_uid, ossec_gid)